libavcodec.hg: i386/cavsdsp

annotate i386/cavsdsp_mmx.c @ 3680:7690bafea6e0 libavcodec

Mark this file properly as LGPL as allowed by its author Leon van Stuivenberg. Clean up the outdated URLs in the header.

author	diego
date	Tue, 05 Sep 2006 13:50:36 +0000
parents	419409926166
children	c8c591fe26f8

rev	line source
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	1 /*
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	4 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	5 * MMX optimised DSP functions, based on H.264 optimisations by
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	6 * Michael Niedermayer and Loren Merritt
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	7 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	8 * This library is free software; you can redistribute it and/or
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	9 * modify it under the terms of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	10 * License as published by the Free Software Foundation; either
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	11 * version 2 of the License, or (at your option) any later version.
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	12 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	13 * This library is distributed in the hope that it will be useful,
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	16 * Lesser General Public License for more details.
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	17 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	18 * You should have received a copy of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	19 * License along with this library; if not, write to the Free Software
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	21 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	22
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	23 #include "../dsputil.h"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	24 #include "common.h"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	25
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	26 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_4 ) = 0x0004000400040004ULL;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	27 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_5 ) = 0x0005000500050005ULL;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	28 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_7 ) = 0x0007000700070007ULL;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	29 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_42) = 0x002A002A002A002AULL;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	30 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_64) = 0x0040004000400040ULL;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	31 DECLARE_ALIGNED_8(static const uint64_t,ff_pw_96) = 0x0060006000600060ULL;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	32
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	33 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	34 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	35 * inverse transform
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	36 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	37 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	38
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	39 #define SUMSUB_BA( a, b ) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	40 "paddw "#b", "#a" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	41 "paddw "#b", "#b" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	42 "psubw "#a", "#b" \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	43
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	44 #define SBUTTERFLY(a,b,t,n)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	45 "movq " #a ", " #t " \n\t" /* abcd */\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	46 "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	47 "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	48
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	49 #define TRANSPOSE4(a,b,c,d,t)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	50 SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	51 SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	52 SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	53 SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	54
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	55 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	56 {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	57 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	58 "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	59 "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	60 "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	61 "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	62 "movq %%mm4, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	63 "movq %%mm5, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	64 "movq %%mm2, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	65 "movq %%mm7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	66
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	67 "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2src7 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	68 "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2src1 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	69 "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2src5 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	70 "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2src3 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	71 "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3src7 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	72 "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3src1 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	73 "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3src5 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	74 "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3src3 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	75 "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3src1 - 2src7 = a0 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	76 "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3src3 + 2src5 = a1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	77 "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2src3 - 3src5 = a2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	78 "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2src1 + 3src7 = a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	79
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	80 "movq %%mm5, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	81 "movq %%mm7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	82 "movq %%mm3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	83 "movq %%mm1, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	84 SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	85 "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	86 "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	87 "paddw %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	88 "paddw %%mm5, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	89 "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	90 "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	91
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	92 SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	93 "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	94 "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	95 "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	96 "paddw %%mm1, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	97 "paddw %%mm3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	98 "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	99 "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	100
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	101 "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	102 "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	103 "movq %%mm2, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	104 "movq %%mm6, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	105 "psllw $2, %%mm4 \n\t" /* mm4 = 4src2 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	106 "psllw $2, %%mm6 \n\t" /* mm6 = 4src6 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	107 "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5src2 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	108 "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5src6 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	109 "paddw %%mm2, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	110 "paddw %%mm0, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	111 "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4src2 - 10src6 = a7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	112 "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4src6 + 10src2 = a6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	113
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	114 "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	115 "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	116 SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	117 "psllw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	118 "psllw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	119 "paddw %1, %%mm0 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	120 "paddw %1, %%mm2 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	121
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	122 SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	123 SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	124 SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	125 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	126 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	127 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	128 :: "r"(block), "m"(bias)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	129 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	130 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	131
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	132 static void cavs_idct8_add_mmx(uint8_t dst, int16_t block, int stride)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	133 {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	134 int i;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	135 DECLARE_ALIGNED_8(int16_t, b2[64]);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	136
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	137 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	138 DECLARE_ALIGNED_8(uint64_t, tmp);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	139
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	140 cavs_idct8_1d(block+4*i, ff_pw_4);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	141
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	142 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	143 "psraw $3, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	144 "psraw $3, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	145 "psraw $3, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	146 "psraw $3, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	147 "psraw $3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	148 "psraw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	149 "psraw $3, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	150 "psraw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	151 "movq %%mm7, %0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	152 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	153 "movq %%mm0, 8(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	154 "movq %%mm6, 24(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	155 "movq %%mm7, 40(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	156 "movq %%mm4, 56(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	157 "movq %0, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	158 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	159 "movq %%mm7, (%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	160 "movq %%mm1, 16(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	161 "movq %%mm0, 32(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	162 "movq %%mm3, 48(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	163 : "=m"(tmp)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	164 : "r"(b2+32*i)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	165 : "memory"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	166 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	167 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	168
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	169 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	170 cavs_idct8_1d(b2+4*i, ff_pw_64);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	171
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	172 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	173 "psraw $7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	174 "psraw $7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	175 "psraw $7, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	176 "psraw $7, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	177 "psraw $7, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	178 "psraw $7, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	179 "psraw $7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	180 "psraw $7, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	181 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	182 "movq %%mm5, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	183 "movq %%mm3, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	184 "movq %%mm1, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	185 "movq %%mm0, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	186 "movq %%mm2, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	187 "movq %%mm4, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	188 "movq %%mm6, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	189 :: "r"(b2+4*i)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	190 : "memory"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	191 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	192 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	193
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	194 add_pixels_clamped_mmx(b2, dst, stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	195
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	196 /* clear block */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	197 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	198 "pxor %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	199 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	200 "movq %%mm7, 8(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	201 "movq %%mm7, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	202 "movq %%mm7, 24(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	203 "movq %%mm7, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	204 "movq %%mm7, 40(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	205 "movq %%mm7, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	206 "movq %%mm7, 56(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	207 "movq %%mm7, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	208 "movq %%mm7, 72(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	209 "movq %%mm7, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	210 "movq %%mm7, 88(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	211 "movq %%mm7, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	212 "movq %%mm7, 104(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	213 "movq %%mm7, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	214 "movq %%mm7, 120(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	215 :: "r" (block)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	216 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	217 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	218
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	219 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	220 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	221 * motion compensation
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	222 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	223 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	224
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	225 /* vertical filter [-1 -2 96 42 -7 0] */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	226 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	227 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	228 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	229 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	230 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	231 "pmullw %6, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	232 "psllw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	233 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	234 "psraw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	235 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	236 "paddw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	237 "paddw "#B", "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	238 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	239 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	240 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	241 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	242 "psraw $1, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	243 "psubw "#A", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	244 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	245 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	246 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	247 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	248 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	249
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	250 /* vertical filter [ 0 -1 5 5 -1 0] */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	251 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	252 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	253 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	254 "paddw "#D", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	255 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	256 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	257 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	258 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	259 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	260 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	261 "psraw $3, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	262 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	263 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	264 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	265
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	266 /* vertical filter [ 0 -7 42 96 -2 -1] */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	267 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	268 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	269 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	270 "pmullw %6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	271 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	272 "pmullw %5, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	273 "psllw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	274 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	275 "psraw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	276 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	277 "paddw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	278 "paddw "#E", "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	279 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	280 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	281 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	282 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	283 "psraw $1, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	284 "psubw "#F", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	285 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	286 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	287 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	288 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	289 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	290
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	291
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	292 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	293 int w= 2;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	294 src -= 2*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	295 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	296 while(w--){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	297 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	298 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	299 "movd (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	300 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	301 "movd (%0), %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	302 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	303 "movd (%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	304 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	305 "movd (%0), %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	306 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	307 "movd (%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	308 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	309 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	310 "punpcklbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	311 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	312 "punpcklbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	313 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	314 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	315 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	316 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	317 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	318 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	319 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	320 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	321 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	322 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	323 : "+a"(src), "+c"(dst)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	324 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	325 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	326 );\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	327 if(h==16){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	328 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	329 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	330 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	331 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	332 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	333 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	334 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	335 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	336 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	337 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	338 : "+a"(src), "+c"(dst)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	339 : "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	340 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	341 );\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	342 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	343 src += 4-(h+5)*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	344 dst += 4-h*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	345 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	346
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	347 #define QPEL_CAVS(OPNAME, OP, MMX)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	348 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	349 int h=8;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	350 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	351 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	352 "movq %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	353 "1: \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	354 "movq (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	355 "movq 1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	356 "movq %%mm0, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	357 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	358 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	359 "punpckhbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	360 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	361 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	362 "paddw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	363 "paddw %%mm3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	364 "pmullw %%mm6, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	365 "pmullw %%mm6, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	366 "movq -1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	367 "movq 2(%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	368 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	369 "movq %%mm4, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	370 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	371 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	372 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	373 "punpckhbw %%mm7, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	374 "paddw %%mm4, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	375 "paddw %%mm3, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	376 "psubw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	377 "psubw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	378 "movq %6, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	379 "paddw %%mm5, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	380 "paddw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	381 "psraw $3, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	382 "psraw $3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	383 "packuswb %%mm1, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	384 OP(%%mm0, (%1),%%mm5, q) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	385 "add %3, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	386 "add %4, %1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	387 "decl %2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	388 " jnz 1b \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	389 : "+a"(src), "+c"(dst), "+m"(h)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	390 : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	391 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	392 );\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	393 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	394 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	395 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	396 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	397 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	398 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	399 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	400 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	401 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	402 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	403 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	404 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	405 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	406 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	407 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	408 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	409 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	410 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	411 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	412 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	413 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	414 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	415 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	416 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	417 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	418 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	419 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	420 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	421 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	422 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	423 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	424 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	425 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	426 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	427 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	428 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	429 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	430 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	431 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	432 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	433 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	434 src += 8*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	435 dst += 8*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	436 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	437 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	438 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	439
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	440 #define CAVS_MC(OPNAME, SIZE, MMX) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	441 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	442 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	443 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	444 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	445 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	446 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	447 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	448 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	449 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	450 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	451 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	452 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	453 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	454 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	455 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	456
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	457 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	458 #define AVG_3DNOW_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	459 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	460 "pavgusb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	461 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	462 #define AVG_MMX2_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	463 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	464 "pavgb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	465 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	466
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	467 QPEL_CAVS(put_, PUT_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	468 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	469 QPEL_CAVS(put_, PUT_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	470 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	471
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	472 CAVS_MC(put_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	473 CAVS_MC(put_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	474 CAVS_MC(avg_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	475 CAVS_MC(avg_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	476 CAVS_MC(put_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	477 CAVS_MC(put_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	478 CAVS_MC(avg_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	479 CAVS_MC(avg_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	480
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	481 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	482 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	483 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	484 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	485
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	486 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	487 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	488 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	489 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	490 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	491 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	492 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	493
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	494 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	495 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	496 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	497 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	498 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	499 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	500 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	501
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	502 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	503 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	504 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	505 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	506 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	507 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	508 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	509
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	510 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	511 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	512 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	513 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	514 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	515 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	516 }

Mercurial > libavcodec.hg

annotate i386/cavsdsp_mmx.c @ 3680:7690bafea6e0 libavcodec