mplayer.hg: libmpeg2/idct

annotate libmpeg2/idct_mmx.c @ 8219:8f6548a70cda

sync by hephooey@fastmail.fm

author	jaf
date	Sun, 17 Nov 2002 12:31:02 +0000
parents	4fa90be8da03
children	47984e3f54ce

rev	line source
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	1 /*
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	2 * idct_mmx.c
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	3 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	4 *
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	5 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	6 *
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	7 * mpeg2dec is free software; you can redistribute it and/or modify
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	8 * it under the terms of the GNU General Public License as published by
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	9 * the Free Software Foundation; either version 2 of the License, or
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	10 * (at your option) any later version.
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	11 *
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	12 * mpeg2dec is distributed in the hope that it will be useful,
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	15 * GNU General Public License for more details.
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	16 *
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	17 * You should have received a copy of the GNU General Public License
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	18 * along with this program; if not, write to the Free Software
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	20 */
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	21
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	22 #include "config.h"
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	23
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	24 #ifdef ARCH_X86
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	25
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	26 #include <inttypes.h>
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	27
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	28 #include "mpeg2_internal.h"
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	29 #include "attributes.h"
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	30 #include "mmx.h"
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	31
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	32 #define ROW_SHIFT 11
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	33 #define COL_SHIFT 6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	34
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	35 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	36 #define rounder(bias) {round (bias), round (bias)}
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	37
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	38
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	39 #if 0
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	40 /* C row IDCT - its just here to document the MMXEXT and MMX versions */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	41 static inline void idct_row (int16_t * row, int offset,
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	42 int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	43 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	44 int C1, C2, C3, C4, C5, C6, C7;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	45 int a0, a1, a2, a3, b0, b1, b2, b3;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	46
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	47 row += offset;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	48
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	49 C1 = table[1];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	50 C2 = table[2];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	51 C3 = table[3];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	52 C4 = table[4];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	53 C5 = table[5];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	54 C6 = table[6];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	55 C7 = table[7];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	56
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	57 a0 = C4row[0] + C2row[2] + C4row[4] + C6row[6] + *rounder;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	58 a1 = C4row[0] + C6row[2] - C4row[4] - C2row[6] + *rounder;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	59 a2 = C4row[0] - C6row[2] - C4row[4] + C2row[6] + *rounder;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	60 a3 = C4row[0] - C2row[2] + C4row[4] - C6row[6] + *rounder;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	61
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	62 b0 = C1row[1] + C3row[3] + C5row[5] + C7row[7];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	63 b1 = C3row[1] - C7row[3] - C1row[5] - C5row[7];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	64 b2 = C5row[1] - C1row[3] + C7row[5] + C3row[7];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	65 b3 = C7row[1] - C5row[3] + C3row[5] - C1row[7];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	66
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	67 row[0] = (a0 + b0) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	68 row[1] = (a1 + b1) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	69 row[2] = (a2 + b2) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	70 row[3] = (a3 + b3) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	71 row[4] = (a3 - b3) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	72 row[5] = (a2 - b2) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	73 row[6] = (a1 - b1) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	74 row[7] = (a0 - b0) >> ROW_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	75 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	76 #endif
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	77
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	78
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	79 /* MMXEXT row IDCT */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	80
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	81 #define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	82 c4, c6, c4, c6, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	83 c1, c3, -c1, -c5, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	84 c5, c7, c3, -c7, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	85 c4, -c6, c4, -c6, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	86 -c4, c2, c4, -c2, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	87 c5, -c1, c3, -c1, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	88 c7, c3, c7, -c5 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	89
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	90 static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	91 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	92 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	93
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	94 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	95 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	96
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	97 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	98 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	99
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	100 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	101 pmaddwd_r2r (mm0, mm3); // mm3 = -C4x4-C2x6 C4x0+C2x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	102
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	103 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	104 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	105
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	106 static inline void mmxext_row (int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	107 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	108 movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	109 pmaddwd_r2r (mm2, mm4); // mm4 = C4x0+C6x2 C4x4+C6x6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	110
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	111 pmaddwd_m2r ((table+16), mm0); // mm0 = C4x4-C6x6 C4x0-C6*x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	112 pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	113
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	114 movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	115 pmaddwd_r2r (mm5, mm1); // mm1 = -C1x5-C5x7 C1x1+C3x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	116
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	117 paddd_m2r (*rounder, mm3); // mm3 += rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	118 pmaddwd_r2r (mm6, mm7); // mm7 = C3x1-C7x3 C5x5+C7x7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	119
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	120 pmaddwd_m2r ((table+20), mm2); // mm2 = C4x0-C2x2 -C4x4+C2*x6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	121 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	122
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	123 pmaddwd_m2r ((table+24), mm5); // mm5 = C3x5-C1x7 C5x1-C1*x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	124 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	125
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	126 pmaddwd_m2r ((table+28), mm6); // mm6 = C7x1-C5x3 C7x5+C3*x7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	127 paddd_r2r (mm7, mm1); // mm1 = b1 b0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	128
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	129 paddd_m2r (*rounder, mm0); // mm0 += rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	130 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	131
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	132 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	133 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	134
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	135 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	136 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	137
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	138 paddd_r2r (mm6, mm5); // mm5 = b3 b2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	139 movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	140
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	141 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	142 psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	143 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	144
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	145 static inline void mmxext_row_tail (int16_t * row, int store)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	146 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	147 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	148
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	149 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	150
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	151 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	152
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	153 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	154
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	155 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	156 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	157
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	158 /* slot */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	159
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	160 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	161 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	162
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	163 static inline void mmxext_row_mid (int16_t * row, int store,
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	164 int offset, int16_t * table)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	165 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	166 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	167 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	168
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	169 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	170 psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	171
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	172 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	173 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	174
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	175 packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	176 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	177
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	178 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	179 pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	180
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	181 movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	182 movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	183
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	184 pmaddwd_r2r (mm0, mm3); // mm3 = -C4x4-C2x6 C4x0+C2x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	185
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	186 movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	187 pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	188 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	189
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	190
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	191 /* MMX row IDCT */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	192
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	193 #define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	194 c4, c6, -c4, -c2, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	195 c1, c3, c3, -c7, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	196 c5, c7, -c1, -c5, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	197 c4, -c6, c4, -c2, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	198 -c4, c2, c4, -c6, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	199 c5, -c1, c7, -c5, \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	200 c7, c3, c3, -c1 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	201
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	202 static inline void mmx_row_head (int16_t * row, int offset, int16_t * table)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	203 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	204 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	205
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	206 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	207 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	208
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	209 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	210 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	211
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	212 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	213
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	214 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	215 pmaddwd_r2r (mm0, mm3); // mm3 = C4x0+C6x2 C4x0+C2x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	216
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	217 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	218 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	219 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	220
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	221 static inline void mmx_row (int16_t * table, int32_t * rounder)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	222 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	223 pmaddwd_r2r (mm2, mm4); // mm4 = -C4x4-C2x6 C4x4+C6x6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	224 punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	225
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	226 pmaddwd_m2r ((table+16), mm0); // mm0 = C4x0-C2x2 C4x0-C6*x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	227 punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	228
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	229 movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	230 pmaddwd_r2r (mm5, mm1); // mm1 = C3x1-C7x3 C1x1+C3x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	231
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	232 paddd_m2r (*rounder, mm3); // mm3 += rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	233 pmaddwd_r2r (mm6, mm7); // mm7 = -C1x5-C5x7 C5x5+C7x7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	234
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	235 pmaddwd_m2r ((table+20), mm2); // mm2 = C4x4-C6x6 -C4x4+C2*x6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	236 paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	237
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	238 pmaddwd_m2r ((table+24), mm5); // mm5 = C7x1-C5x3 C5x1-C1*x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	239 movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	240
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	241 pmaddwd_m2r ((table+28), mm6); // mm6 = C3x5-C1x7 C7x5+C3*x7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	242 paddd_r2r (mm7, mm1); // mm1 = b1 b0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	243
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	244 paddd_m2r (*rounder, mm0); // mm0 += rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	245 psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	246
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	247 psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	248 paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	249
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	250 paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	251 psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	252
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	253 paddd_r2r (mm6, mm5); // mm5 = b3 b2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	254 movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	255
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	256 paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	257 psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	258 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	259
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	260 static inline void mmx_row_tail (int16_t * row, int store)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	261 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	262 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	263
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	264 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	265
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	266 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	267
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	268 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	269
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	270 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	271 movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	272
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	273 pslld_i2r (16, mm7); // mm7 = y7 0 y5 0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	274
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	275 psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	276
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	277 por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	278
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	279 /* slot */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	280
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	281 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	282 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	283
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	284 static inline void mmx_row_mid (int16_t * row, int store,
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	285 int offset, int16_t * table)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	286 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	287 movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	288 psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	289
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	290 movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	291 psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	292
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	293 packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	294 movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	295
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	296 packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	297 movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	298
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	299 movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	300 movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	301
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	302 punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	303 psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	304
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	305 movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	306 pslld_i2r (16, mm1); // mm1 = y7 0 y5 0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	307
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	308 movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	309 por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	310
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	311 movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	312 punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	313
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	314 movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	315 pmaddwd_r2r (mm0, mm3); // mm3 = C4x0+C6x2 C4x0+C2x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	316 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	317
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	318
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	319 #if 0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	320 // C column IDCT - its just here to document the MMXEXT and MMX versions
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	321 static inline void idct_col (int16_t * col, int offset)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	322 {
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	323 /* multiplication - as implemented on mmx */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	324 #define F(c,x) (((c) * (x)) >> 16)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	325
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	326 /* saturation - it helps us handle torture test cases */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	327 #define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	328
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	329 int16_t x0, x1, x2, x3, x4, x5, x6, x7;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	330 int16_t y0, y1, y2, y3, y4, y5, y6, y7;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	331 int16_t a0, a1, a2, a3, b0, b1, b2, b3;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	332 int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	333
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	334 col += offset;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	335
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	336 x0 = col[0*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	337 x1 = col[1*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	338 x2 = col[2*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	339 x3 = col[3*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	340 x4 = col[4*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	341 x5 = col[5*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	342 x6 = col[6*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	343 x7 = col[7*8];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	344
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	345 u04 = S (x0 + x4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	346 v04 = S (x0 - x4);
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	347 u26 = S (F (T2, x6) + x2);
846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	348 v26 = S (F (T2, x2) - x6);
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	349
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	350 a0 = S (u04 + u26);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	351 a1 = S (v04 + v26);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	352 a2 = S (v04 - v26);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	353 a3 = S (u04 - u26);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	354
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	355 u17 = S (F (T1, x7) + x1);
846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	356 v17 = S (F (T1, x1) - x7);
846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	357 u35 = S (F (T3, x5) + x3);
846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	358 v35 = S (F (T3, x3) - x5);
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	359
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	360 b0 = S (u17 + u35);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	361 b3 = S (v17 - v35);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	362 u12 = S (u17 - u35);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	363 v12 = S (v17 + v35);
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	364 u12 = S (2 * F (C4, u12));
846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	365 v12 = S (2 * F (C4, v12));
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	366 b1 = S (u12 + v12);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	367 b2 = S (u12 - v12);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	368
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	369 y0 = S (a0 + b0) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	370 y1 = S (a1 + b1) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	371 y2 = S (a2 + b2) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	372 y3 = S (a3 + b3) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	373
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	374 y4 = S (a3 - b3) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	375 y5 = S (a2 - b2) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	376 y6 = S (a1 - b1) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	377 y7 = S (a0 - b0) >> COL_SHIFT;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	378
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	379 col[0*8] = y0;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	380 col[1*8] = y1;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	381 col[2*8] = y2;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	382 col[3*8] = y3;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	383 col[4*8] = y4;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	384 col[5*8] = y5;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	385 col[6*8] = y6;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	386 col[7*8] = y7;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	387 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	388 #endif
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	389
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	390
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	391 // MMX column IDCT
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	392 static inline void idct_col (int16_t * col, int offset)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	393 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	394 #define T1 13036
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	395 #define T2 27146
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	396 #define T3 43790
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	397 #define C4 23170
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	398
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	399 static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	400 static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	401 static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	402 static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	403
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	404 /* column code adapted from peter gubanov */
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	405 /* http://www.elecard.com/peter/idct.shtml */
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	406
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	407 movq_m2r (*_T1, mm0); // mm0 = T1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	408
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	409 movq_m2r ((col+offset+18), mm1); // mm1 = x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	410 movq_r2r (mm0, mm2); // mm2 = T1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	411
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	412 movq_m2r ((col+offset+78), mm4); // mm4 = x7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	413 pmulhw_r2r (mm1, mm0); // mm0 = T1*x1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	414
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	415 movq_m2r (*_T3, mm5); // mm5 = T3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	416 pmulhw_r2r (mm4, mm2); // mm2 = T1*x7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	417
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	418 movq_m2r ((col+offset+58), mm6); // mm6 = x5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	419 movq_r2r (mm5, mm7); // mm7 = T3-1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	420
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	421 movq_m2r ((col+offset+38), mm3); // mm3 = x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	422 psubsw_r2r (mm4, mm0); // mm0 = v17
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	423
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	424 movq_m2r (*_T2, mm4); // mm4 = T2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	425 pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	426
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	427 paddsw_r2r (mm2, mm1); // mm1 = u17
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	428 pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	429
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	430 /* slot */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	431
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	432 movq_r2r (mm4, mm2); // mm2 = T2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	433 paddsw_r2r (mm3, mm5); // mm5 = T3*x3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	434
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	435 pmulhw_m2r ((col+offset+28), mm4);// mm4 = T2*x2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	436 paddsw_r2r (mm6, mm7); // mm7 = T3*x5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	437
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	438 psubsw_r2r (mm6, mm5); // mm5 = v35
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	439 paddsw_r2r (mm3, mm7); // mm7 = u35
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	440
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	441 movq_m2r ((col+offset+68), mm3); // mm3 = x6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	442 movq_r2r (mm0, mm6); // mm6 = v17
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	443
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	444 pmulhw_r2r (mm3, mm2); // mm2 = T2*x6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	445 psubsw_r2r (mm5, mm0); // mm0 = b3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	446
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	447 psubsw_r2r (mm3, mm4); // mm4 = v26
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	448 paddsw_r2r (mm6, mm5); // mm5 = v12
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	449
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	450 movq_r2m (mm0, (col+offset+38)); // save b3 in scratch0
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	451 movq_r2r (mm1, mm6); // mm6 = u17
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	452
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	453 paddsw_m2r ((col+offset+28), mm2);// mm2 = u26
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	454 paddsw_r2r (mm7, mm6); // mm6 = b0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	455
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	456 psubsw_r2r (mm7, mm1); // mm1 = u12
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	457 movq_r2r (mm1, mm7); // mm7 = u12
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	458
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	459 movq_m2r ((col+offset+08), mm3); // mm3 = x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	460 paddsw_r2r (mm5, mm1); // mm1 = u12+v12
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	461
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	462 movq_m2r (*_C4, mm0); // mm0 = C4/2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	463 psubsw_r2r (mm5, mm7); // mm7 = u12-v12
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	464
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	465 movq_r2m (mm6, (col+offset+58)); // save b0 in scratch1
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	466 pmulhw_r2r (mm0, mm1); // mm1 = b1/2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	467
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	468 movq_r2r (mm4, mm6); // mm6 = v26
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	469 pmulhw_r2r (mm0, mm7); // mm7 = b2/2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	470
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	471 movq_m2r ((col+offset+48), mm5); // mm5 = x4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	472 movq_r2r (mm3, mm0); // mm0 = x0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	473
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	474 psubsw_r2r (mm5, mm3); // mm3 = v04
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	475 paddsw_r2r (mm5, mm0); // mm0 = u04
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	476
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	477 paddsw_r2r (mm3, mm4); // mm4 = a1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	478 movq_r2r (mm0, mm5); // mm5 = u04
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	479
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	480 psubsw_r2r (mm6, mm3); // mm3 = a2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	481 paddsw_r2r (mm2, mm5); // mm5 = a0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	482
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	483 paddsw_r2r (mm1, mm1); // mm1 = b1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	484 psubsw_r2r (mm2, mm0); // mm0 = a3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	485
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	486 paddsw_r2r (mm7, mm7); // mm7 = b2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	487 movq_r2r (mm3, mm2); // mm2 = a2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	488
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	489 movq_r2r (mm4, mm6); // mm6 = a1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	490 paddsw_r2r (mm7, mm3); // mm3 = a2+b2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	491
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	492 psraw_i2r (COL_SHIFT, mm3); // mm3 = y2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	493 paddsw_r2r (mm1, mm4); // mm4 = a1+b1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	494
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	495 psraw_i2r (COL_SHIFT, mm4); // mm4 = y1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	496 psubsw_r2r (mm1, mm6); // mm6 = a1-b1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	497
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	498 movq_m2r ((col+offset+58), mm1); // mm1 = b0
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	499 psubsw_r2r (mm7, mm2); // mm2 = a2-b2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	500
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	501 psraw_i2r (COL_SHIFT, mm6); // mm6 = y6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	502 movq_r2r (mm5, mm7); // mm7 = a0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	503
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	504 movq_r2m (mm4, (col+offset+18)); // save y1
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	505 psraw_i2r (COL_SHIFT, mm2); // mm2 = y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	506
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	507 movq_r2m (mm3, (col+offset+28)); // save y2
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	508 paddsw_r2r (mm1, mm5); // mm5 = a0+b0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	509
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	510 movq_m2r ((col+offset+38), mm4); // mm4 = b3
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	511 psubsw_r2r (mm1, mm7); // mm7 = a0-b0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	512
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	513 psraw_i2r (COL_SHIFT, mm5); // mm5 = y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	514 movq_r2r (mm0, mm3); // mm3 = a3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	515
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	516 movq_r2m (mm2, (col+offset+58)); // save y5
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	517 psubsw_r2r (mm4, mm3); // mm3 = a3-b3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	518
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	519 psraw_i2r (COL_SHIFT, mm7); // mm7 = y7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	520 paddsw_r2r (mm0, mm4); // mm4 = a3+b3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	521
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	522 movq_r2m (mm5, (col+offset+08)); // save y0
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	523 psraw_i2r (COL_SHIFT, mm3); // mm3 = y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	524
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	525 movq_r2m (mm6, (col+offset+68)); // save y6
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	526 psraw_i2r (COL_SHIFT, mm4); // mm4 = y3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	527
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	528 movq_r2m (mm7, (col+offset+78)); // save y7
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	529
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	530 movq_r2m (mm3, (col+offset+48)); // save y4
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	531
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	532 movq_r2m (mm4, (col+offset+38)); // save y3
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	533 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	534
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	535
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	536 static int32_t rounder0[] ATTR_ALIGN(8) =
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	537 rounder ((1 << (COL_SHIFT - 1)) - 0.5);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	538 static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	539 static int32_t rounder1[] ATTR_ALIGN(8) =
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	540 rounder (1.25683487303); /* C1(C1/C4+C1+C7)/2 /
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	541 static int32_t rounder7[] ATTR_ALIGN(8) =
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	542 rounder (-0.25); /* C1(C7/C4+C7-C1)/2 /
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	543 static int32_t rounder2[] ATTR_ALIGN(8) =
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	544 rounder (0.60355339059); /* C2 * (C6+C2)/2 */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	545 static int32_t rounder6[] ATTR_ALIGN(8) =
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	546 rounder (-0.25); /* C2 * (C6-C2)/2 */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	547 static int32_t rounder3[] ATTR_ALIGN(8) =
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	548 rounder (0.087788325588); /* C3(-C3/C4+C3+C5)/2 /
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	549 static int32_t rounder5[] ATTR_ALIGN(8) =
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	550 rounder (-0.441341716183); /* C3(-C5/C4+C5-C3)/2 /
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	551
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	552
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	553 #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
1437 4fa90be8da03 made mmx/mmxext idct public -> now libavcodec can use it arpi parents: 36 diff changeset	554 inline void idct (int16_t * block) \
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	555 { \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	556 static int16_t table04[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	557 table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	558 static int16_t table17[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	559 table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	560 static int16_t table26[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	561 table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	562 static int16_t table35[] ATTR_ALIGN(16) = \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	563 table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	564 \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	565 idct_row_head (block, 0*8, table04); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	566 idct_row (table04, rounder0); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	567 idct_row_mid (block, 08, 48, table04); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	568 idct_row (table04, rounder4); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	569 idct_row_mid (block, 48, 18, table17); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	570 idct_row (table17, rounder1); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	571 idct_row_mid (block, 18, 78, table17); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	572 idct_row (table17, rounder7); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	573 idct_row_mid (block, 78, 28, table26); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	574 idct_row (table26, rounder2); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	575 idct_row_mid (block, 28, 68, table26); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	576 idct_row (table26, rounder6); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	577 idct_row_mid (block, 68, 38, table35); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	578 idct_row (table35, rounder3); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	579 idct_row_mid (block, 38, 58, table35); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	580 idct_row (table35, rounder5); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	581 idct_row_tail (block, 5*8); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	582 \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	583 idct_col (block, 0); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	584 idct_col (block, 4); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	585 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	586
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	587
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	588 #define COPY_MMX(offset,r0,r1,r2) \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	589 do { \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	590 movq_m2r (*(block+offset), r0); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	591 dest += stride; \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	592 movq_m2r (*(block+offset+4), r1); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	593 movq_r2m (r2, *dest); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	594 packuswb_r2r (r1, r0); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	595 } while (0)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	596
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	597 static void block_copy (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	598 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	599 movq_m2r ((block+08), mm0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	600 movq_m2r ((block+08+4), mm1);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	601 movq_m2r ((block+18), mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	602 packuswb_r2r (mm1, mm0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	603 movq_m2r ((block+18+4), mm3);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	604 movq_r2m (mm0, *dest);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	605 packuswb_r2r (mm3, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	606 COPY_MMX (2*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	607 COPY_MMX (3*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	608 COPY_MMX (4*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	609 COPY_MMX (5*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	610 COPY_MMX (6*8, mm0, mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	611 COPY_MMX (7*8, mm2, mm3, mm0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	612 movq_r2m (mm2, *(dest+stride));
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	613 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	614
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	615
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	616 #define ADD_MMX(offset,r1,r2,r3,r4) \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	617 do { \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	618 movq_m2r ((dest+2stride), r1); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	619 packuswb_r2r (r4, r3); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	620 movq_r2r (r1, r2); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	621 dest += stride; \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	622 movq_r2m (r3, *dest); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	623 punpcklbw_r2r (mm0, r1); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	624 paddsw_m2r (*(block+offset), r1); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	625 punpckhbw_r2r (mm0, r2); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	626 paddsw_m2r (*(block+offset+4), r2); \
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	627 } while (0)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	628
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	629 static void block_add (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	630 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	631 movq_m2r (*dest, mm1);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	632 pxor_r2r (mm0, mm0);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	633 movq_m2r (*(dest+stride), mm3);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	634 movq_r2r (mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	635 punpcklbw_r2r (mm0, mm1);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	636 movq_r2r (mm3, mm4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	637 paddsw_m2r ((block+08), mm1);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	638 punpckhbw_r2r (mm0, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	639 paddsw_m2r ((block+08+4), mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	640 punpcklbw_r2r (mm0, mm3);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	641 paddsw_m2r ((block+18), mm3);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	642 packuswb_r2r (mm2, mm1);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	643 punpckhbw_r2r (mm0, mm4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	644 movq_r2m (mm1, *dest);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	645 paddsw_m2r ((block+18+4), mm4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	646 ADD_MMX (2*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	647 ADD_MMX (3*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	648 ADD_MMX (4*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	649 ADD_MMX (5*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	650 ADD_MMX (6*8, mm1, mm2, mm3, mm4);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	651 ADD_MMX (7*8, mm3, mm4, mm1, mm2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	652 packuswb_r2r (mm4, mm3);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	653 movq_r2m (mm3, *(dest+stride));
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	654 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	655
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	656
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	657 declare_idct (mmxext_idct, mmxext_table,
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	658 mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	659
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	660 void idct_block_copy_mmxext (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	661 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	662 mmxext_idct (block);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	663 block_copy (block, dest, stride);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	664 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	665
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	666 void idct_block_add_mmxext (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	667 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	668 mmxext_idct (block);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	669 block_add (block, dest, stride);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	670 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	671
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	672
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	673 declare_idct (mmx_idct, mmx_table,
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	674 mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	675
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	676 void idct_block_copy_mmx (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	677 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	678 mmx_idct (block);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	679 block_copy (block, dest, stride);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	680 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	681
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	682 void idct_block_add_mmx (int16_t * block, uint8_t * dest, int stride)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	683 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	684 mmx_idct (block);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	685 block_add (block, dest, stride);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	686 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	687
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	688
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	689 void idct_mmx_init (void)
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	690 {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	691 extern uint8_t scan_norm[64];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	692 extern uint8_t scan_alt[64];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	693 int i, j;
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	694
36 846535ace7a2 libmpeg2-0.2.0 merge arpi_esp parents: 1 diff changeset	695 /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
1 3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	696
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	697 for (i = 0; i < 64; i++) {
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	698 j = scan_norm[i];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	699 scan_norm[i] = (j & 0x38) \| ((j & 6) >> 1) \| ((j & 1) << 2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	700 j = scan_alt[i];
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	701 scan_alt[i] = (j & 0x38) \| ((j & 6) >> 1) \| ((j & 1) << 2);
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	702 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	703 }
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	704
3b5f5d1c5041 Initial revision arpi_esp parents: diff changeset	705 #endif

Mercurial > mplayer.hg

annotate libmpeg2/idct_mmx.c @ 8219:8f6548a70cda