libavcodec.hg: i386/dsputil_mmx

annotate i386/dsputil_mmx_avg.h @ 394:e2cb8a4ee0c5 libavcodec

proper memory handling functions

author	glantau
date	Sat, 18 May 2002 22:59:50 +0000
parents	f874493a1970
children	92d143c2d5a8

rev	line source
0 986e461dc072 Initial revision glantau parents: diff changeset	1 /*
986e461dc072 Initial revision glantau parents: diff changeset	2 * DSP utils : average functions are compiled twice for 3dnow/mmx2
986e461dc072 Initial revision glantau parents: diff changeset	3 * Copyright (c) 2000, 2001 Gerard Lantau.
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	4 * Copyright (c) 2002 Michael Niedermayer
0 986e461dc072 Initial revision glantau parents: diff changeset	5 *
986e461dc072 Initial revision glantau parents: diff changeset	6 * This program is free software; you can redistribute it and/or modify
986e461dc072 Initial revision glantau parents: diff changeset	7 * it under the terms of the GNU General Public License as published by
986e461dc072 Initial revision glantau parents: diff changeset	8 * the Free Software Foundation; either version 2 of the License, or
986e461dc072 Initial revision glantau parents: diff changeset	9 * (at your option) any later version.
986e461dc072 Initial revision glantau parents: diff changeset	10 *
986e461dc072 Initial revision glantau parents: diff changeset	11 * This program is distributed in the hope that it will be useful,
986e461dc072 Initial revision glantau parents: diff changeset	12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
986e461dc072 Initial revision glantau parents: diff changeset	13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
986e461dc072 Initial revision glantau parents: diff changeset	14 * GNU General Public License for more details.
986e461dc072 Initial revision glantau parents: diff changeset	15 *
986e461dc072 Initial revision glantau parents: diff changeset	16 * You should have received a copy of the GNU General Public License
986e461dc072 Initial revision glantau parents: diff changeset	17 * along with this program; if not, write to the Free Software
986e461dc072 Initial revision glantau parents: diff changeset	18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
986e461dc072 Initial revision glantau parents: diff changeset	19 *
986e461dc072 Initial revision glantau parents: diff changeset	20 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	21 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
0 986e461dc072 Initial revision glantau parents: diff changeset	22 */
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	23
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	24 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	25 clobber bug */
0 986e461dc072 Initial revision glantau parents: diff changeset	26 static void DEF(put_pixels_x2)(UINT8 block, const UINT8 pixels, int line_size, int h)
986e461dc072 Initial revision glantau parents: diff changeset	27 {
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	28 __asm __volatile(
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	29 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	30 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	31 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	32 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	33 "movq 1(%1, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	34 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	35 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	36 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	37 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	38 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	39 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	40 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	41 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	42 "movq 1(%1, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	43 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	44 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	45 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	46 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	47 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	48 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	49 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	50 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	51 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	52 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	53 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	54 "D"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	55 :"%eax", "memory");
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	56 }
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	57
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	58 /* GL: this function does incorrect rounding if overflow */
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	59 static void DEF(put_no_rnd_pixels_x2)(UINT8 block, const UINT8 pixels, int line_size, int h)
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	60 {
0 986e461dc072 Initial revision glantau parents: diff changeset	61 __asm __volatile(
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	62 "xorl %%eax, %%eax \n\t"
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	63 MOVQ_BONE(%%mm7)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	64 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	65 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	66 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	67 "movq 1(%1, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	68 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	69 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	70 "psubusb %%mm7, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	71 "psubusb %%mm7, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	72 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	73 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	74 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	75 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	76 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	77 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	78 "movq 1(%1, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	79 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	80 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	81 "psubusb %%mm7, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	82 "psubusb %%mm7, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	83 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	84 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	85 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	86 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	87 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	88 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	89 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	90 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	91 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	92 "D"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	93 :"%eax", "memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	94 }
986e461dc072 Initial revision glantau parents: diff changeset	95
986e461dc072 Initial revision glantau parents: diff changeset	96 static void DEF(put_pixels_y2)(UINT8 block, const UINT8 pixels, int line_size, int h)
986e461dc072 Initial revision glantau parents: diff changeset	97 {
986e461dc072 Initial revision glantau parents: diff changeset	98 __asm __volatile(
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	99 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	100 "movq (%1), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	101 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	102 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	103 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	104 "movq (%3, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	105 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	106 PAVGB" %%mm2, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	107 "movq %%mm0, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	108 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	109 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	110 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	111 "movq (%3, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	112 PAVGB" %%mm1, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	113 PAVGB" %%mm0, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	114 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	115 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	116 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	117 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	118 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	119 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	120 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	121 "D" (block+line_size), "g"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	122 :"%eax", "memory");
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	123 }
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	124
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	125 /* GL: this function does incorrect rounding if overflow */
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	126 static void DEF(put_no_rnd_pixels_y2)(UINT8 block, const UINT8 pixels, int line_size, int h)
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	127 {
0 986e461dc072 Initial revision glantau parents: diff changeset	128 __asm __volatile(
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	129 MOVQ_BONE(%%mm7)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	130 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	131 "movq (%1), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	132 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	133 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	134 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	135 "movq (%3, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	136 "psubusb %%mm7, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	137 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	138 PAVGB" %%mm2, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	139 "movq %%mm0, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	140 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	141 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	142 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	143 "movq (%3, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	144 "psubusb %%mm7, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	145 PAVGB" %%mm1, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	146 PAVGB" %%mm0, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	147 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	148 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	149 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	150 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	151 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	152 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	153 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	154 "D" (block+line_size), "g"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	155 :"%eax", "memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	156 }
986e461dc072 Initial revision glantau parents: diff changeset	157
986e461dc072 Initial revision glantau parents: diff changeset	158 static void DEF(avg_pixels)(UINT8 block, const UINT8 pixels, int line_size, int h)
986e461dc072 Initial revision glantau parents: diff changeset	159 {
986e461dc072 Initial revision glantau parents: diff changeset	160 __asm __volatile(
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	161 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	162 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	163 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	164 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	165 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	166 "movq (%3, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	167 "movq (%4, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	168 PAVGB" %%mm3, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	169 PAVGB" %%mm4, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	170 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	171 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	172 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	173 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	174 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	175 "movq (%3, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	176 "movq (%4, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	177 PAVGB" %%mm3, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	178 PAVGB" %%mm4, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	179 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	180 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	181 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	182 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	183 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	184 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	185 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	186 "D"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	187 :"%eax", "memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	188 }
986e461dc072 Initial revision glantau parents: diff changeset	189
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	190 static void DEF(avg_pixels_x2)(UINT8 block, const UINT8 pixels, int line_size, int h)
0 986e461dc072 Initial revision glantau parents: diff changeset	191 {
986e461dc072 Initial revision glantau parents: diff changeset	192 __asm __volatile(
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	193 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	194 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	195 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	196 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	197 "movq 1(%1, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	198 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	199 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	200 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	201 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	202 "movq (%3, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	203 "movq (%4, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	204 PAVGB" %%mm3, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	205 PAVGB" %%mm4, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	206 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	207 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	208 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	209 "movq (%1, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	210 "movq 1(%1, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	211 "movq (%2, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	212 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	213 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	214 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	215 "movq (%3, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	216 "movq (%4, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	217 PAVGB" %%mm3, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	218 PAVGB" %%mm4, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	219 "movq %%mm0, (%3, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	220 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	221 "addl %5, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	222 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	223 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	224 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	225 :"b"(pixels), "c"(pixels+line_size), "d" (block), "S" (block+line_size),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	226 "D"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	227 :"%eax", "memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	228 }
986e461dc072 Initial revision glantau parents: diff changeset	229
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	230 static void DEF(avg_pixels_y2)(UINT8 block, const UINT8 pixels, int line_size, int h)
0 986e461dc072 Initial revision glantau parents: diff changeset	231 {
986e461dc072 Initial revision glantau parents: diff changeset	232 __asm __volatile(
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	233 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	234 "movq (%1), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	235 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	236 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	237 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	238 "movq (%3, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	239 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	240 PAVGB" %%mm2, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	241 "movq (%4, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	242 "movq (%5, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	243 PAVGB" %%mm3, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	244 PAVGB" %%mm4, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	245 "movq %%mm0, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	246 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	247 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	248 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	249 "movq (%3, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	250 PAVGB" %%mm1, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	251 PAVGB" %%mm0, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	252 "movq (%4, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	253 "movq (%5, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	254 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	255 PAVGB" %%mm4, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	256 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	257 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	258 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	259 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	260 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	261 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	262 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	263 "D" (block+line_size), "g"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	264 :"%eax", "memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	265 }
986e461dc072 Initial revision glantau parents: diff changeset	266
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	267 // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	268 static void DEF(avg_pixels_xy2)(UINT8 block, const UINT8 pixels, int line_size, int h)
0 986e461dc072 Initial revision glantau parents: diff changeset	269 {
986e461dc072 Initial revision glantau parents: diff changeset	270 __asm __volatile(
387 b8f3affeb8e1 shared lib support (req by kabi) ... michaelni parents: 386 diff changeset	271 MOVQ_BONE(%%mm7)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	272 "xorl %%eax, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	273 "movq (%1), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	274 "movq 1(%1), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	275 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	276 ".balign 16 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	277 "1: \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	278 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	279 "movq (%3, %%eax), %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	280 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	281 "movq 1(%3, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	282 "psubusb %%mm7, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	283 PAVGB" %%mm3, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	284 PAVGB" %%mm4, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	285 PAVGB" %%mm1, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	286 PAVGB" %%mm2, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	287 "movq (%4, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	288 "movq (%5, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	289 PAVGB" %%mm3, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	290 PAVGB" %%mm4, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	291 "movq %%mm0, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	292 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	293 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	294 "movq (%2, %%eax), %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	295 "movq (%3, %%eax), %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	296 "movq 1(%2, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	297 "movq 1(%3, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	298 PAVGB" %%mm3, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	299 PAVGB" %%mm4, %%mm0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	300 PAVGB" %%mm1, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	301 PAVGB" %%mm0, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	302 "movq (%4, %%eax), %%mm3 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	303 "movq (%5, %%eax), %%mm4 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	304 PAVGB" %%mm3, %%mm2 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	305 PAVGB" %%mm4, %%mm1 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	306 "movq %%mm2, (%4, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	307 "movq %%mm1, (%5, %%eax) \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	308 "addl %6, %%eax \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	309 "subl $4, %0 \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	310 " jnz 1b \n\t"
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	311 :"+g"(h)
389 f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	312 :"b"(pixels), "c"(pixels+line_size), "d"(pixels+line_size*2), "S" (block),
f874493a1970 tried to avoid gcc 2.95.2 bug by puting explicit register constraints - added comment about rounding bug in some functions (need to correct or suppress them for regression tests) glantau parents: 387 diff changeset	313 "D" (block+line_size), "g"(line_size<<1)
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	314 :"%eax", "memory");
0 986e461dc072 Initial revision glantau parents: diff changeset	315 }
986e461dc072 Initial revision glantau parents: diff changeset	316
386 f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	317 //Note: the sub* functions are no used
f49629bab18d hopefully faster mmx2&3dnow MC michaelni parents: 151 diff changeset	318
0 986e461dc072 Initial revision glantau parents: diff changeset	319 static void DEF(sub_pixels_x2)( DCTELEM block, const UINT8 pixels, int line_size, int h)
986e461dc072 Initial revision glantau parents: diff changeset	320 {
986e461dc072 Initial revision glantau parents: diff changeset	321 DCTELEM *p;
986e461dc072 Initial revision glantau parents: diff changeset	322 const UINT8 *pix;
986e461dc072 Initial revision glantau parents: diff changeset	323 p = block;
986e461dc072 Initial revision glantau parents: diff changeset	324 pix = pixels;
986e461dc072 Initial revision glantau parents: diff changeset	325 __asm __volatile(
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 8 diff changeset	326 "pxor %%mm7, %%mm7":);
0 986e461dc072 Initial revision glantau parents: diff changeset	327 do {
986e461dc072 Initial revision glantau parents: diff changeset	328 __asm __volatile(
986e461dc072 Initial revision glantau parents: diff changeset	329 "movq 1%1, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	330 "movq %0, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	331 PAVGB" %1, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	332 "movq 8%0, %%mm1\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	333 "movq %%mm2, %%mm3\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	334 "punpcklbw %%mm7, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	335 "punpckhbw %%mm7, %%mm3\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	336 "psubsw %%mm2, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	337 "psubsw %%mm3, %%mm1\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	338 "movq %%mm0, %0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	339 "movq %%mm1, 8%0\n\t"
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 8 diff changeset	340 :"+m"(*p)
0 986e461dc072 Initial revision glantau parents: diff changeset	341 :"m"(*pix)
986e461dc072 Initial revision glantau parents: diff changeset	342 :"memory");
986e461dc072 Initial revision glantau parents: diff changeset	343 pix += line_size;
986e461dc072 Initial revision glantau parents: diff changeset	344 p += 8;
986e461dc072 Initial revision glantau parents: diff changeset	345 } while (--h);
986e461dc072 Initial revision glantau parents: diff changeset	346 }
986e461dc072 Initial revision glantau parents: diff changeset	347
986e461dc072 Initial revision glantau parents: diff changeset	348 static void DEF(sub_pixels_y2)( DCTELEM block, const UINT8 pixels, int line_size, int h)
986e461dc072 Initial revision glantau parents: diff changeset	349 {
986e461dc072 Initial revision glantau parents: diff changeset	350 DCTELEM *p;
986e461dc072 Initial revision glantau parents: diff changeset	351 const UINT8 *pix;
986e461dc072 Initial revision glantau parents: diff changeset	352 p = block;
986e461dc072 Initial revision glantau parents: diff changeset	353 pix = pixels;
986e461dc072 Initial revision glantau parents: diff changeset	354 __asm __volatile(
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 8 diff changeset	355 "pxor %%mm7, %%mm7":);
0 986e461dc072 Initial revision glantau parents: diff changeset	356 do {
986e461dc072 Initial revision glantau parents: diff changeset	357 __asm __volatile(
986e461dc072 Initial revision glantau parents: diff changeset	358 "movq %2, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	359 "movq %0, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	360 PAVGB" %1, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	361 "movq 8%0, %%mm1\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	362 "movq %%mm2, %%mm3\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	363 "punpcklbw %%mm7, %%mm2\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	364 "punpckhbw %%mm7, %%mm3\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	365 "psubsw %%mm2, %%mm0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	366 "psubsw %%mm3, %%mm1\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	367 "movq %%mm0, %0\n\t"
986e461dc072 Initial revision glantau parents: diff changeset	368 "movq %%mm1, 8%0\n\t"
151 ae0516eadae2 fixed gcc-3.0.x compilation (by Michael Niedermayer) nickols_k parents: 8 diff changeset	369 :"+m"(*p)
0 986e461dc072 Initial revision glantau parents: diff changeset	370 :"m"(pix), "m"((pix+line_size))
986e461dc072 Initial revision glantau parents: diff changeset	371 :"memory");
986e461dc072 Initial revision glantau parents: diff changeset	372 pix += line_size;
986e461dc072 Initial revision glantau parents: diff changeset	373 p += 8;
986e461dc072 Initial revision glantau parents: diff changeset	374 } while (--h);
986e461dc072 Initial revision glantau parents: diff changeset	375 }
986e461dc072 Initial revision glantau parents: diff changeset	376

Mercurial > libavcodec.hg

annotate i386/dsputil_mmx_avg.h @ 394:e2cb8a4ee0c5 libavcodec