annotate dctref.c @ 11032:01bd040f8607 libavcodec

Unroll main loop so the edge==0 case is seperate. This allows many things to be simplified away. h264 decoder is overall 1% faster with a mbaff sample and 0.1% slower with the cathedral sample, probably because the slow loop filter code must be loaded into the code cache for each first MB of each row but isnt used for the following MBs.
author michael
date Thu, 28 Jan 2010 01:24:25 +0000
parents aad816bc3d54
children 4b6b3ffbaee3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9308
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
1 /*
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
2 * reference discrete cosine transform (double precision)
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
3 * Copyright (C) 2009 Dylan Yudaken
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
4 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
5 * This file is part of FFmpeg.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
6 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
11 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
15 * Lesser General Public License for more details.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
16 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
20 */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
21
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
22 /**
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
23 * @file libavcodec/dctref.c
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
24 * reference discrete cosine transform (double precision)
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
25 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
26 * @author Dylan Yudaken (dyudaken at gmail)
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
27 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
28 * @note This file could be optimized a lot, but is for
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
29 * reference and so readability is better.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
30 */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
31
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
32 #include "libavutil/mathematics.h"
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
33 static double coefficients[8 * 8];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
34
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
35 /**
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
36 * Initialize the double precision discrete cosine transform
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
37 * functions fdct & idct.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
38 */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
39 av_cold void ff_ref_dct_init(void)
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
40 {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
41 unsigned int i, j;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
42
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
43 for (j = 0; j < 8; ++j) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
44 coefficients[j] = sqrt(0.125);
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
45 for (i = 8; i < 64; i += 8) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
46 coefficients[i + j] = 0.5 * cos(i * (j + 0.5) * M_PI / 64.0);
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
47 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
48 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
49 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
50
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
51 /**
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
52 * Transform 8x8 block of data with a double precision forward DCT <br>
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
53 * This is a reference implementation.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
54 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
55 * @param block pointer to 8x8 block of data to transform
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
56 */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
57 void ff_ref_fdct(short *block)
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
58 {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
59 /* implement the equation: block = coefficients * block * coefficients' */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
60
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
61 unsigned int i, j, k;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
62 double out[8 * 8];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
63
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
64 /* out = coefficients * block */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
65 for (i = 0; i < 64; i += 8) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
66 for (j = 0; j < 8; ++j) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
67 double tmp = 0;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
68 for (k = 0; k < 8; ++k) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
69 tmp += coefficients[i + k] * block[k * 8 + j];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
70 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
71 out[i + j] = tmp * 8;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
72 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
73 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
74
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
75 /* block = out * (coefficients') */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
76 for (j = 0; j < 8; ++j) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
77 for (i = 0; i < 64; i += 8) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
78 double tmp = 0;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
79 for (k = 0; k < 8; ++k) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
80 tmp += out[i + k] * coefficients[j * 8 + k];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
81 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
82 block[i + j] = floor(tmp + 0.499999999999);
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
83 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
84 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
85 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
86
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
87 /**
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
88 * Transform 8x8 block of data with a double precision inverse DCT <br>
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
89 * This is a reference implementation.
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
90 *
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
91 * @param block pointer to 8x8 block of data to transform
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
92 */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
93 void ff_ref_idct(short *block)
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
94 {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
95 /* implement the equation: block = (coefficients') * block * coefficients */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
96
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
97 unsigned int i, j, k;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
98 double out[8 * 8];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
99
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
100 /* out = block * coefficients */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
101 for (i = 0; i < 64; i += 8) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
102 for (j = 0; j < 8; ++j) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
103 double tmp = 0;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
104 for (k = 0; k < 8; ++k) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
105 tmp += block[i + k] * coefficients[k * 8 + j];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
106 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
107 out[i + j] = tmp;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
108 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
109 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
110
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
111 /* block = (coefficients') * out */
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
112 for (i = 0; i < 8; ++i) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
113 for (j = 0; j < 8; ++j) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
114 double tmp = 0;
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
115 for (k = 0; k < 64; k += 8) {
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
116 tmp += coefficients[k + i] * out[k + j];
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
117 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
118 block[i * 8 + j] = floor(tmp + 0.5);
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
119 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
120 }
aad816bc3d54 Replacement reference DCT implementation.
diego
parents:
diff changeset
121 }