Mercurial > libavcodec.hg
annotate binkidct.c @ 12279:7fb91885433c libavcodec
Use pmaddubsw for the mbedge_filter (>=ssse3), 6-10 cycles faster.
author | rbultje |
---|---|
date | Mon, 26 Jul 2010 21:18:19 +0000 |
parents | 7dd2a45249a9 |
children |
rev | line source |
---|---|
11231 | 1 /* |
2 * Bink IDCT algorithm | |
3 * Copyright (c) 2009 Kostya Shishkov | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11231
diff
changeset
|
23 * @file |
11231 | 24 * Bink IDCT algorithm |
25 */ | |
26 | |
27 #include "dsputil.h" | |
28 | |
29 #define A1 2896 /* (1/sqrt(2))<<12 */ | |
30 #define A2 2217 | |
31 #define A3 3784 | |
32 #define A4 -5352 | |
33 | |
34 #define IDCT_TRANSFORM(dest,s0,s1,s2,s3,s4,s5,s6,s7,d0,d1,d2,d3,d4,d5,d6,d7,munge,src) {\ | |
35 const int a0 = (src)[s0] + (src)[s4]; \ | |
36 const int a1 = (src)[s0] - (src)[s4]; \ | |
37 const int a2 = (src)[s2] + (src)[s6]; \ | |
38 const int a3 = (A1*((src)[s2] - (src)[s6])) >> 11; \ | |
39 const int a4 = (src)[s5] + (src)[s3]; \ | |
40 const int a5 = (src)[s5] - (src)[s3]; \ | |
41 const int a6 = (src)[s1] + (src)[s7]; \ | |
42 const int a7 = (src)[s1] - (src)[s7]; \ | |
43 const int b0 = a4 + a6; \ | |
44 const int b1 = (A3*(a5 + a7)) >> 11; \ | |
45 const int b2 = ((A4*a5) >> 11) - b0 + b1; \ | |
46 const int b3 = (A1*(a6 - a4) >> 11) - b2; \ | |
47 const int b4 = ((A2*a7) >> 11) + b3 - b1; \ | |
48 (dest)[d0] = munge(a0+a2 +b0); \ | |
49 (dest)[d1] = munge(a1+a3-a2+b2); \ | |
50 (dest)[d2] = munge(a1-a3+a2+b3); \ | |
51 (dest)[d3] = munge(a0-a2 -b4); \ | |
52 (dest)[d4] = munge(a0-a2 +b4); \ | |
53 (dest)[d5] = munge(a1-a3+a2-b3); \ | |
54 (dest)[d6] = munge(a1+a3-a2-b2); \ | |
55 (dest)[d7] = munge(a0+a2 -b0); \ | |
56 } | |
57 /* end IDCT_TRANSFORM macro */ | |
58 | |
59 #define MUNGE_NONE(x) (x) | |
60 #define IDCT_COL(dest,src) IDCT_TRANSFORM(dest,0,8,16,24,32,40,48,56,0,8,16,24,32,40,48,56,MUNGE_NONE,src) | |
61 | |
62 #define MUNGE_ROW(x) (((x) + 0x7F)>>8) | |
63 #define IDCT_ROW(dest,src) IDCT_TRANSFORM(dest,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,MUNGE_ROW,src) | |
64 | |
65 static inline void bink_idct_col(DCTELEM *dest, const DCTELEM *src) | |
66 { | |
67 if ((src[8]|src[16]|src[24]|src[32]|src[40]|src[48]|src[56])==0) { | |
68 dest[0] = | |
69 dest[8] = | |
70 dest[16] = | |
71 dest[24] = | |
72 dest[32] = | |
73 dest[40] = | |
74 dest[48] = | |
75 dest[56] = src[0]; | |
76 } else { | |
77 IDCT_COL(dest, src); | |
78 } | |
79 } | |
80 | |
81 void ff_bink_idct_c(DCTELEM *block) | |
82 { | |
83 int i; | |
84 DCTELEM temp[64]; | |
85 | |
86 for (i = 0; i < 8; i++) | |
87 bink_idct_col(&temp[i], &block[i]); | |
88 for (i = 0; i < 8; i++) { | |
89 IDCT_ROW( (&block[8*i]), (&temp[8*i]) ); | |
90 } | |
91 } | |
92 | |
93 void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block) | |
94 { | |
95 int i, j; | |
96 | |
97 ff_bink_idct_c(block); | |
98 for (i = 0; i < 8; i++, dest += linesize, block += 8) | |
99 for (j = 0; j < 8; j++) | |
100 dest[j] += block[j]; | |
101 } | |
102 | |
103 void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block) | |
104 { | |
105 int i; | |
106 DCTELEM temp[64]; | |
107 for (i = 0; i < 8; i++) | |
108 bink_idct_col(&temp[i], &block[i]); | |
109 for (i = 0; i < 8; i++) { | |
110 IDCT_ROW( (&dest[i*linesize]), (&temp[8*i]) ); | |
111 } | |
112 } |