annotate eaidct.c @ 12197:fbf4d5b1b664 libavcodec

Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag, FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that have been checked specifically on such CPUs and are actually faster than their MMX counterparts. In addition, use this flag to enable particular VP8 and LPC SSE2 functions that are faster than their MMX counterparts. Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author rbultje
date Mon, 19 Jul 2010 22:38:23 +0000
parents 7dd2a45249a9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8120
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
1 /*
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
2 * Electronic Arts TGQ/TQI/MAD IDCT algorithm
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
3 * Copyright (c) 2007-2008 Peter Ross <pross@xvid.org>
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
4 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
5 * This file is part of FFmpeg.
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
6 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
11 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
15 * Lesser General Public License for more details.
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
16 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
20 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
21
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
22 /**
11644
7dd2a45249a9 Remove explicit filename from Doxygen @file commands.
diego
parents: 8718
diff changeset
23 * @file
8120
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
24 * Electronic Arts TGQ/TQI/MAD IDCT algorithm
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
25 * @author Peter Ross <pross@xvid.org>
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
26 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
27
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
28 #include "dsputil.h"
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
29
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
30 #define ASQRT 181 /* (1/sqrt(2))<<8 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
31 #define A4 669 /* cos(pi/8)*sqrt(2)<<9 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
32 #define A2 277 /* sin(pi/8)*sqrt(2)<<9 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
33 #define A5 196 /* sin(pi/8)<<9 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
34
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
35 #define IDCT_TRANSFORM(dest,s0,s1,s2,s3,s4,s5,s6,s7,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
36 const int a1 = (src)[s1] + (src)[s7]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
37 const int a7 = (src)[s1] - (src)[s7]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
38 const int a5 = (src)[s5] + (src)[s3]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
39 const int a3 = (src)[s5] - (src)[s3]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
40 const int a2 = (src)[s2] + (src)[s6]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
41 const int a6 = (ASQRT*((src)[s2] - (src)[s6]))>>8; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
42 const int a0 = (src)[s0] + (src)[s4]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
43 const int a4 = (src)[s0] - (src)[s4]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
44 const int b0 = (((A4-A5)*a7 - A5*a3)>>9) + a1+a5; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
45 const int b1 = (((A4-A5)*a7 - A5*a3)>>9) + ((ASQRT*(a1-a5))>>8); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
46 const int b2 = (((A2+A5)*a3 + A5*a7)>>9) + ((ASQRT*(a1-a5))>>8); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
47 const int b3 = ((A2+A5)*a3 + A5*a7)>>9; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
48 (dest)[d0] = munge(a0+a2+a6+b0); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
49 (dest)[d1] = munge(a4+a6 +b1); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
50 (dest)[d2] = munge(a4-a6 +b2); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
51 (dest)[d3] = munge(a0-a2-a6+b3); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
52 (dest)[d4] = munge(a0-a2-a6-b3); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
53 (dest)[d5] = munge(a4-a6 -b2); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
54 (dest)[d6] = munge(a4+a6 -b1); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
55 (dest)[d7] = munge(a0+a2+a6-b0); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
56 }
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
57 /* end IDCT_TRANSFORM macro */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
58
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
59 #define MUNGE_NONE(x) (x)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
60 #define IDCT_COL(dest,src) IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,0,8,16,24,32,40,48,56,MUNGE_NONE,src)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
61
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
62 #define MUNGE_8BIT(x) av_clip_uint8((x)>>4)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
63 #define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,MUNGE_8BIT,src)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
64
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
65 static inline void ea_idct_col(DCTELEM *dest, const DCTELEM *src) {
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
66 if ((src[8]|src[16]|src[24]|src[32]|src[40]|src[48]|src[56])==0) {
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
67 dest[0] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
68 dest[8] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
69 dest[16] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
70 dest[24] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
71 dest[32] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
72 dest[40] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
73 dest[48] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
74 dest[56] = src[0];
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
75 }else
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
76 IDCT_COL(dest, src);
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
77 }
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
78
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
79 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block) {
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
80 int i;
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
81 DCTELEM temp[64];
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
82 block[0] += 4;
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
83 for (i=0; i<8; i++)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
84 ea_idct_col(&temp[i], &block[i]);
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
85 for (i=0; i<8; i++)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
86 IDCT_ROW( (&dest[i*linesize]), (&temp[8*i]) );
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
87 }