annotate eaidct.c @ 8376:e1b242224970 libavcodec

Use the new idct functions (except chroma as it was slower in benchmarks) cathedral +0.5% speed aladin +0.6% speed [note aladin has been cat-ed 10 times to reduce the influence of init time] Speedup also verified via START/STOP_TIMER (difference was very significant for the changed parts)
author michael
date Thu, 18 Dec 2008 02:53:18 +0000
parents 3d4e01bcd2a5
children e9d9d946f213
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8120
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
1 /*
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
2 * Electronic Arts TGQ/TQI/MAD IDCT algorithm
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
3 * Copyright (c) 2007-2008 Peter Ross <pross@xvid.org>
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
4 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
5 * This file is part of FFmpeg.
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
6 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
11 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
15 * Lesser General Public License for more details.
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
16 *
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
20 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
21
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
22 /**
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
23 * @file eaidct.c
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
24 * Electronic Arts TGQ/TQI/MAD IDCT algorithm
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
25 * @author Peter Ross <pross@xvid.org>
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
26 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
27
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
28 #include "dsputil.h"
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
29
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
30 #define ASQRT 181 /* (1/sqrt(2))<<8 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
31 #define A4 669 /* cos(pi/8)*sqrt(2)<<9 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
32 #define A2 277 /* sin(pi/8)*sqrt(2)<<9 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
33 #define A5 196 /* sin(pi/8)<<9 */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
34
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
35 #define IDCT_TRANSFORM(dest,s0,s1,s2,s3,s4,s5,s6,s7,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
36 const int a1 = (src)[s1] + (src)[s7]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
37 const int a7 = (src)[s1] - (src)[s7]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
38 const int a5 = (src)[s5] + (src)[s3]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
39 const int a3 = (src)[s5] - (src)[s3]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
40 const int a2 = (src)[s2] + (src)[s6]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
41 const int a6 = (ASQRT*((src)[s2] - (src)[s6]))>>8; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
42 const int a0 = (src)[s0] + (src)[s4]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
43 const int a4 = (src)[s0] - (src)[s4]; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
44 const int b0 = (((A4-A5)*a7 - A5*a3)>>9) + a1+a5; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
45 const int b1 = (((A4-A5)*a7 - A5*a3)>>9) + ((ASQRT*(a1-a5))>>8); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
46 const int b2 = (((A2+A5)*a3 + A5*a7)>>9) + ((ASQRT*(a1-a5))>>8); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
47 const int b3 = ((A2+A5)*a3 + A5*a7)>>9; \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
48 (dest)[d0] = munge(a0+a2+a6+b0); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
49 (dest)[d1] = munge(a4+a6 +b1); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
50 (dest)[d2] = munge(a4-a6 +b2); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
51 (dest)[d3] = munge(a0-a2-a6+b3); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
52 (dest)[d4] = munge(a0-a2-a6-b3); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
53 (dest)[d5] = munge(a4-a6 -b2); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
54 (dest)[d6] = munge(a4+a6 -b1); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
55 (dest)[d7] = munge(a0+a2+a6-b0); \
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
56 }
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
57 /* end IDCT_TRANSFORM macro */
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
58
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
59 #define MUNGE_NONE(x) (x)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
60 #define IDCT_COL(dest,src) IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,0,8,16,24,32,40,48,56,MUNGE_NONE,src)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
61
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
62 #define MUNGE_8BIT(x) av_clip_uint8((x)>>4)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
63 #define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,MUNGE_8BIT,src)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
64
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
65 static inline void ea_idct_col(DCTELEM *dest, const DCTELEM *src) {
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
66 if ((src[8]|src[16]|src[24]|src[32]|src[40]|src[48]|src[56])==0) {
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
67 dest[0] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
68 dest[8] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
69 dest[16] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
70 dest[24] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
71 dest[32] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
72 dest[40] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
73 dest[48] =
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
74 dest[56] = src[0];
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
75 }else
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
76 IDCT_COL(dest, src);
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
77 }
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
78
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
79 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block) {
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
80 int i;
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
81 DCTELEM temp[64];
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
82 block[0] += 4;
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
83 for (i=0; i<8; i++)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
84 ea_idct_col(&temp[i], &block[i]);
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
85 for (i=0; i<8; i++)
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
86 IDCT_ROW( (&dest[i*linesize]), (&temp[8*i]) );
3d4e01bcd2a5 Electronic Arts TGQ/TQI/MAD IDCT algorithm
pross
parents:
diff changeset
87 }