libavcodec.hg: i386/cavsdsp

annotate i386/cavsdsp_mmx.c @ 6920:d02af7474bff libavcodec

Prevent 128*1<<trellis from becoming 0 and creating 0 sized arrays. fixes CID84 RUN2 CID85 RUN2 CID86 RUN2 CID87 RUN2 CID88 RUN2 CID89 RUN2 CID90 RUN2 CID91 RUN2 CID92 RUN2 CID93 RUN2 CID94 RUN2 CID95 RUN2 CID96 RUN2 CID97 RUN2 CID98 RUN2 CID99 RUN2 CID100 RUN2 CID101 RUN2 CID102 RUN2 CID103 RUN2 CID104 RUN2 CID105 RUN2 CID106 RUN2

author	michael
date	Wed, 28 May 2008 11:59:41 +0000
parents	f7cbb7733146
children	eebc7209c47f

rev	line source
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	1 /*
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	2 * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	3 * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	4 *
5967 15ed47af1838 Misc spelling fixes, prefer American over British English. diego parents: 5963 diff changeset	5 * MMX-optimized DSP functions, based on H.264 optimizations by
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	6 * Michael Niedermayer and Loren Merritt
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	7 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3524 diff changeset	8 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3524 diff changeset	9 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3524 diff changeset	10 * FFmpeg is free software; you can redistribute it and/or
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	11 * modify it under the terms of the GNU Lesser General Public
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	12 * License as published by the Free Software Foundation; either
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3524 diff changeset	13 * version 2.1 of the License, or (at your option) any later version.
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	14 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3524 diff changeset	15 * FFmpeg is distributed in the hope that it will be useful,
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	18 * Lesser General Public License for more details.
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	19 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	20 * You should have received a copy of the GNU Lesser General Public
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3524 diff changeset	21 * License along with FFmpeg; if not, write to the Free Software
5215 2b72f9bc4f06 license header consistency cosmetics diego parents: 5010 diff changeset	22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	23 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	24
6763 f7cbb7733146 Use full path for #includes from another directory. diego parents: 6755 diff changeset	25 #include "libavutil/common.h"
f7cbb7733146 Use full path for #includes from another directory. diego parents: 6755 diff changeset	26 #include "libavutil/x86_cpu.h"
f7cbb7733146 Use full path for #includes from another directory. diego parents: 6755 diff changeset	27 #include "libavcodec/dsputil.h"
5946 55251379b5b1 make ff_p* vars extern so that they can be used in various _mmx.c files aurel* parents: 5215 diff changeset	28 #include "dsputil_mmx.h"
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	29
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	30 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	31 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	32 * inverse transform
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	33 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	34 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	35
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	36 static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	37 {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	38 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	39 "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	40 "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	41 "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	42 "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	43 "movq %%mm4, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	44 "movq %%mm5, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	45 "movq %%mm2, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	46 "movq %%mm7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	47
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	48 "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2src7 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	49 "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2src1 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	50 "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2src5 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	51 "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2src3 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	52 "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3src7 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	53 "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3src1 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	54 "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3src5 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	55 "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3src3 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	56 "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3src1 - 2src7 = a0 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	57 "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3src3 + 2src5 = a1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	58 "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2src3 - 3src5 = a2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	59 "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2src1 + 3src7 = a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	60
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	61 "movq %%mm5, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	62 "movq %%mm7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	63 "movq %%mm3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	64 "movq %%mm1, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	65 SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	66 "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	67 "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	68 "paddw %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	69 "paddw %%mm5, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	70 "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	71 "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	72
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	73 SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	74 "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	75 "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	76 "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	77 "paddw %%mm1, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	78 "paddw %%mm3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	79 "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	80 "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	81
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	82 "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	83 "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	84 "movq %%mm2, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	85 "movq %%mm6, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	86 "psllw $2, %%mm4 \n\t" /* mm4 = 4src2 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	87 "psllw $2, %%mm6 \n\t" /* mm6 = 4src6 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	88 "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5src2 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	89 "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5src6 /
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	90 "paddw %%mm2, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	91 "paddw %%mm0, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	92 "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4src2 - 10src6 = a7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	93 "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4src6 + 10src2 = a6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	94
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	95 "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	96 "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	97 SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	98 "psllw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	99 "psllw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	100 "paddw %1, %%mm0 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	101 "paddw %1, %%mm2 \n\t" /* add rounding bias */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	102
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	103 SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	104 SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	105 SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	106 SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	107 SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	108 SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	109 :: "r"(block), "m"(bias)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	110 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	111 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	112
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	113 static void cavs_idct8_add_mmx(uint8_t dst, int16_t block, int stride)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	114 {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	115 int i;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	116 DECLARE_ALIGNED_8(int16_t, b2[64]);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	117
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	118 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	119 DECLARE_ALIGNED_8(uint64_t, tmp);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	120
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	121 cavs_idct8_1d(block+4*i, ff_pw_4);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	122
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	123 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	124 "psraw $3, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	125 "psraw $3, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	126 "psraw $3, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	127 "psraw $3, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	128 "psraw $3, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	129 "psraw $3, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	130 "psraw $3, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	131 "psraw $3, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	132 "movq %%mm7, %0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	133 TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	134 "movq %%mm0, 8(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	135 "movq %%mm6, 24(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	136 "movq %%mm7, 40(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	137 "movq %%mm4, 56(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	138 "movq %0, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	139 TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	140 "movq %%mm7, (%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	141 "movq %%mm1, 16(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	142 "movq %%mm0, 32(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	143 "movq %%mm3, 48(%1) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	144 : "=m"(tmp)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	145 : "r"(b2+32*i)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	146 : "memory"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	147 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	148 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	149
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	150 for(i=0; i<2; i++){
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	151 cavs_idct8_1d(b2+4*i, ff_pw_64);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	152
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	153 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	154 "psraw $7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	155 "psraw $7, %%mm6 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	156 "psraw $7, %%mm5 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	157 "psraw $7, %%mm4 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	158 "psraw $7, %%mm3 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	159 "psraw $7, %%mm2 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	160 "psraw $7, %%mm1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	161 "psraw $7, %%mm0 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	162 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	163 "movq %%mm5, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	164 "movq %%mm3, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	165 "movq %%mm1, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	166 "movq %%mm0, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	167 "movq %%mm2, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	168 "movq %%mm4, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	169 "movq %%mm6, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	170 :: "r"(b2+4*i)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	171 : "memory"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	172 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	173 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	174
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	175 add_pixels_clamped_mmx(b2, dst, stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	176
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	177 /* clear block */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	178 asm volatile(
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	179 "pxor %%mm7, %%mm7 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	180 "movq %%mm7, (%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	181 "movq %%mm7, 8(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	182 "movq %%mm7, 16(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	183 "movq %%mm7, 24(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	184 "movq %%mm7, 32(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	185 "movq %%mm7, 40(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	186 "movq %%mm7, 48(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	187 "movq %%mm7, 56(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	188 "movq %%mm7, 64(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	189 "movq %%mm7, 72(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	190 "movq %%mm7, 80(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	191 "movq %%mm7, 88(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	192 "movq %%mm7, 96(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	193 "movq %%mm7, 104(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	194 "movq %%mm7, 112(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	195 "movq %%mm7, 120(%0) \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	196 :: "r" (block)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	197 );
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	198 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	199
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	200 /*****************************************************************************
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	201 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	202 * motion compensation
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	203 *
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	204 ****************************************************************************/
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	205
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	206 /* vertical filter [-1 -2 96 42 -7 0] */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	207 #define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	208 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	209 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	210 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	211 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	212 "pmullw %6, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	213 "psllw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	214 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	215 "psraw $3, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	216 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	217 "paddw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	218 "paddw "#B", "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	219 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	220 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	221 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	222 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	223 "psraw $1, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	224 "psubw "#A", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	225 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	226 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	227 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	228 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	229 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	230
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	231 /* vertical filter [ 0 -1 5 5 -1 0] */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	232 #define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	233 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	234 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	235 "paddw "#D", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	236 "pmullw %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	237 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	238 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	239 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	240 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	241 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	242 "psraw $3, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	243 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	244 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	245 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	246
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	247 /* vertical filter [ 0 -7 42 96 -2 -1] */
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	248 #define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	249 "movd (%0), "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	250 "movq "#C", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	251 "pmullw %6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	252 "movq "#D", %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	253 "pmullw %5, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	254 "psllw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	255 "psubw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	256 "psraw $3, "#B" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	257 "paddw %%mm7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	258 "paddw "#B", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	259 "paddw "#E", "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	260 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	261 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	262 "punpcklbw %%mm7, "#F" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	263 "psubw "#E", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	264 "psraw $1, "#E" \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	265 "psubw "#F", %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	266 "paddw %4, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	267 "psraw $7, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	268 "packuswb %%mm6, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	269 OP(%%mm6, (%1), A, d) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	270 "add %3, %1 \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	271
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	272
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	273 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	274 int w= 2;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	275 src -= 2*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	276 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	277 while(w--){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	278 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	279 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	280 "movd (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	281 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	282 "movd (%0), %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	283 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	284 "movd (%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	285 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	286 "movd (%0), %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	287 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	288 "movd (%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	289 "add %2, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	290 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	291 "punpcklbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	292 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	293 "punpcklbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	294 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	295 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	296 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	297 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	298 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	299 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	300 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	301 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	302 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	303 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	304 : "+a"(src), "+c"(dst)\
6755 33896780c612 Do not misuse long as the size of a register in x86. ramiro parents: 6135 diff changeset	305 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	306 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	307 );\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	308 if(h==16){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	309 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	310 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	311 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	312 VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	313 VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	314 VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	315 VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	316 VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	317 VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	318 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	319 : "+a"(src), "+c"(dst)\
6755 33896780c612 Do not misuse long as the size of a register in x86. ramiro parents: 6135 diff changeset	320 : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	321 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	322 );\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	323 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	324 src += 4-(h+5)*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	325 dst += 4-h*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	326 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	327
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	328 #define QPEL_CAVS(OPNAME, OP, MMX)\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	329 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	330 int h=8;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	331 asm volatile(\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	332 "pxor %%mm7, %%mm7 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	333 "movq %5, %%mm6 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	334 "1: \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	335 "movq (%0), %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	336 "movq 1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	337 "movq %%mm0, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	338 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	339 "punpcklbw %%mm7, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	340 "punpckhbw %%mm7, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	341 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	342 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	343 "paddw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	344 "paddw %%mm3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	345 "pmullw %%mm6, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	346 "pmullw %%mm6, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	347 "movq -1(%0), %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	348 "movq 2(%0), %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	349 "movq %%mm2, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	350 "movq %%mm4, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	351 "punpcklbw %%mm7, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	352 "punpckhbw %%mm7, %%mm3 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	353 "punpcklbw %%mm7, %%mm4 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	354 "punpckhbw %%mm7, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	355 "paddw %%mm4, %%mm2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	356 "paddw %%mm3, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	357 "psubw %%mm2, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	358 "psubw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	359 "movq %6, %%mm5 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	360 "paddw %%mm5, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	361 "paddw %%mm5, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	362 "psraw $3, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	363 "psraw $3, %%mm1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	364 "packuswb %%mm1, %%mm0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	365 OP(%%mm0, (%1),%%mm5, q) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	366 "add %3, %0 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	367 "add %4, %1 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	368 "decl %2 \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	369 " jnz 1b \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	370 : "+a"(src), "+c"(dst), "+m"(h)\
6755 33896780c612 Do not misuse long as the size of a register in x86. ramiro parents: 6135 diff changeset	371 : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
3524 419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	372 : "memory"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	373 );\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	374 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	375 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	376 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	377 QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	378 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	379 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	380 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	381 QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	382 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	383 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	384 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	385 QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	386 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	387 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	388 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	389 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	390 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	391 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	392 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	393 OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	394 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	395 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	396 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	397 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	398 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	399 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	400 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	401 OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	402 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	403 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	404 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	405 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	406 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	407 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	408 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	409 OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	410 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	411 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	412 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	413 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	414 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	415 src += 8*srcStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	416 dst += 8*dstStride;\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	417 OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	418 OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	419 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	420
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	421 #define CAVS_MC(OPNAME, SIZE, MMX) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	422 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	423 OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	424 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	425 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	426 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	427 OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	428 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	429 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	430 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	431 OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	432 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	433 \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	434 static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t dst, uint8_t src, int stride){\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	435 OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	436 }\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	437
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	438 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	439 #define AVG_3DNOW_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	440 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	441 "pavgusb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	442 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	443 #define AVG_MMX2_OP(a,b,temp, size) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	444 "mov" #size " " #b ", " #temp " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	445 "pavgb " #temp ", " #a " \n\t"\
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	446 "mov" #size " " #a ", " #b " \n\t"
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	447
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	448 QPEL_CAVS(put_, PUT_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	449 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	450 QPEL_CAVS(put_, PUT_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	451 QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	452
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	453 CAVS_MC(put_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	454 CAVS_MC(put_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	455 CAVS_MC(avg_, 8, 3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	456 CAVS_MC(avg_, 16,3dnow)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	457 CAVS_MC(put_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	458 CAVS_MC(put_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	459 CAVS_MC(avg_, 8, mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	460 CAVS_MC(avg_, 16,mmx2)
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	461
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	462 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	463 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	464 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	465 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	466
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	467 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	468 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	469 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	470 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	471 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	472 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	473 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	474
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	475 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	476 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	477 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	478 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	479 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	480 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	481 }
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	482
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	483 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	484 #define dspfunc(PFX, IDX, NUM) \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	485 c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	486 c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	487 c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	488 c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	489 c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	490
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	491 dspfunc(put_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	492 dspfunc(put_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	493 dspfunc(avg_cavs_qpel, 0, 16);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	494 dspfunc(avg_cavs_qpel, 1, 8);
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	495 #undef dspfunc
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	496 c->cavs_idct8_add = cavs_idct8_add_mmx;
419409926166 some MMX optimizations for the CAVS decoder stefang parents: diff changeset	497 }

Mercurial > libavcodec.hg

annotate i386/cavsdsp_mmx.c @ 6920:d02af7474bff libavcodec