Mercurial > libavcodec.hg
annotate snow.c @ 9432:c9fda17dc042 libavcodec
Skip loop iterations which are no-ops due to data1[-lag..-1] being zero.
author | koorogi |
---|---|
date | Mon, 13 Apr 2009 18:07:02 +0000 |
parents | 2313bf51945b |
children | 5c58923d47d2 |
rev | line source |
---|---|
2138 | 1 /* |
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2138 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2138 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2138 | 19 */ |
20 | |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
2335 | 24 |
25 #include "rangecoder.h" | |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8611
diff
changeset
|
26 #include "mathops.h" |
2138 | 27 |
28 #include "mpegvideo.h" | |
29 | |
30 #undef NDEBUG | |
31 #include <assert.h> | |
32 | |
33 static const int8_t quant3[256]={ | |
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
50 }; | |
51 static const int8_t quant3b[256]={ | |
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
68 }; | |
2596 | 69 static const int8_t quant3bA[256]={ |
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
86 }; | |
2138 | 87 static const int8_t quant5[256]={ |
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
104 }; | |
105 static const int8_t quant7[256]={ | |
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
122 }; | |
123 static const int8_t quant9[256]={ | |
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
140 }; | |
141 static const int8_t quant11[256]={ | |
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
158 }; | |
159 static const int8_t quant13[256]={ | |
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
176 }; | |
177 | |
178 #if 0 //64*cubic | |
179 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 212 //error:0.000022 |
213 }; | |
214 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
2138 | 231 //error:0.000033 |
232 }; | |
233 #elif 1 // 64*linear | |
234 static const uint8_t obmc32[1024]={ | |
3206 | 235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
2138 | 267 //error:0.000020 |
268 }; | |
269 static const uint8_t obmc16[256]={ | |
3206 | 270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
2138 | 286 //error:0.000015 |
287 }; | |
288 #else //64*cos | |
289 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 322 //error:0.000022 |
323 }; | |
324 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
2138 | 341 //error:0.000022 |
342 }; | |
5910 | 343 #endif /* 0 */ |
2138 | 344 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
346 static const uint8_t obmc8[64]={ |
3206 | 347 4, 12, 20, 28, 28, 20, 12, 4, |
348 12, 36, 60, 84, 84, 60, 36, 12, | |
349 20, 60,100,140,140,100, 60, 20, | |
350 28, 84,140,196,196,140, 84, 28, | |
351 28, 84,140,196,196,140, 84, 28, | |
352 20, 60,100,140,140,100, 60, 20, | |
353 12, 36, 60, 84, 84, 60, 36, 12, | |
354 4, 12, 20, 28, 28, 20, 12, 4, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
359 static const uint8_t obmc4[16]={ |
3206 | 360 16, 48, 48, 16, |
361 48,144,144, 48, | |
362 48,144,144, 48, | |
363 16, 48, 48, 16, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 |
7129 | 367 static const uint8_t * const obmc_tab[4]={ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 obmc32, obmc16, obmc8, obmc4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
370 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
372 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 typedef struct BlockNode{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
375 int16_t my; |
3314 | 376 uint8_t ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t color[3]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 uint8_t type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 //#define TYPE_SPLIT 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
380 #define BLOCK_INTRA 1 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
381 #define BLOCK_OPT 2 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 //#define TYPE_NOCOLOR 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 uint8_t level; //FIXME merge into type? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 }BlockNode; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
385 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 static const BlockNode null_block= { //FIXME add border maybe |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .color= {128,128,128}, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .mx= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
389 .my= 0, |
3314 | 390 .ref= 0, |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .type= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 .level= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 }; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
394 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define LOG2_MB_SIZE 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
396 #define MB_SIZE (1<<LOG2_MB_SIZE) |
5575 | 397 #define ENCODER_EXTRA_BITS 4 |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
398 #define HTAPS_MAX 8 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
399 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
400 typedef struct x_and_coeff{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
401 int16_t x; |
2596 | 402 uint16_t coeff; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
403 } x_and_coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
404 |
2138 | 405 typedef struct SubBand{ |
406 int level; | |
407 int stride; | |
408 int width; | |
409 int height; | |
6412 | 410 int qlog; ///< log(qscale)/log[2^(1/6)] |
2138 | 411 DWTELEM *buf; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
412 IDWTELEM *ibuf; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 int buf_y_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
415 int stride_line; ///< Stride measured in lines, not pixels. |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
416 x_and_coeff * x_coeff; |
2138 | 417 struct SubBand *parent; |
418 uint8_t state[/*7*2*/ 7 + 512][32]; | |
419 }SubBand; | |
420 | |
421 typedef struct Plane{ | |
422 int width; | |
423 int height; | |
424 SubBand band[MAX_DECOMPOSITIONS][4]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
425 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
426 int htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
427 int8_t hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
428 int diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
429 int fast_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
430 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
431 int last_htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
432 int8_t last_hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
433 int last_diag_mc; |
2138 | 434 }Plane; |
435 | |
436 typedef struct SnowContext{ | |
6412 | 437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 438 |
439 AVCodecContext *avctx; | |
2335 | 440 RangeCoder c; |
2138 | 441 DSPContext dsp; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
442 AVFrame new_picture; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
443 AVFrame input_picture; ///< new_picture with the internal linesizes |
2138 | 444 AVFrame current_picture; |
3314 | 445 AVFrame last_picture[MAX_REF_FRAMES]; |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; |
2138 | 447 AVFrame mconly_picture; |
448 // uint8_t q_context[16]; | |
449 uint8_t header_state[32]; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
450 uint8_t block_state[128 + 32*128]; |
2138 | 451 int keyframe; |
2199 | 452 int always_reset; |
2138 | 453 int version; |
454 int spatial_decomposition_type; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
455 int last_spatial_decomposition_type; |
2138 | 456 int temporal_decomposition_type; |
457 int spatial_decomposition_count; | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
458 int last_spatial_decomposition_count; |
2138 | 459 int temporal_decomposition_count; |
3314 | 460 int max_ref_frames; |
461 int ref_frames; | |
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
463 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
2138 | 464 DWTELEM *spatial_dwt_buffer; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
465 IDWTELEM *spatial_idwt_buffer; |
2138 | 466 int colorspace_type; |
467 int chroma_h_shift; | |
468 int chroma_v_shift; | |
469 int spatial_scalability; | |
470 int qlog; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
471 int last_qlog; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 int lambda; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
473 int lambda2; |
3313 | 474 int pass1_rc; |
2138 | 475 int mv_scale; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
476 int last_mv_scale; |
2138 | 477 int qbias; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
478 int last_qbias; |
2138 | 479 #define QBIAS_SHIFT 3 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
480 int b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
481 int b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
482 int block_max_depth; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
483 int last_block_max_depth; |
2138 | 484 Plane plane[MAX_PLANES]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
485 BlockNode *block; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
486 #define ME_CACHE_SIZE 1024 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
487 int me_cache[ME_CACHE_SIZE]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
488 int me_cache_generation; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
489 slice_buffer sb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
490 |
6412 | 491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
492 |
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
493 uint8_t *scratchbuf; |
2138 | 494 }SnowContext; |
495 | |
2562 | 496 typedef struct { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
497 IDWTELEM *b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
498 IDWTELEM *b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
499 IDWTELEM *b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
500 IDWTELEM *b3; |
2562 | 501 int y; |
8308 | 502 } DWTCompose; |
2562 | 503 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
506 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
507 static void iterative_me(SnowContext *s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
508 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 int i; |
2967 | 512 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
513 buf->base_buffer = base_buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
514 buf->line_count = line_count; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
515 buf->line_width = line_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
516 buf->data_count = max_allocated_lines; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
2967 | 519 |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
520 for(i = 0; i < max_allocated_lines; i++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 } |
2967 | 523 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
524 buf->data_stack_top = max_allocated_lines - 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
525 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
528 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
529 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
530 IDWTELEM * buffer; |
2967 | 531 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 assert(buf->data_stack_top >= 0); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
533 // assert(!buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
534 if (buf->line[line]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
535 return buf->line[line]; |
2967 | 536 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
538 buffer = buf->data_stack[buf->data_stack_top]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
539 buf->data_stack_top--; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 buf->line[line] = buffer; |
2967 | 541 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 return buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
543 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
545 static void slice_buffer_release(slice_buffer * buf, int line) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
546 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
548 IDWTELEM * buffer; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
550 assert(line >= 0 && line < buf->line_count); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 assert(buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 buffer = buf->line[line]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 buf->data_stack_top++; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 buf->data_stack[buf->data_stack_top] = buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 buf->line[line] = NULL; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
559 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
560 static void slice_buffer_flush(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
561 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 int i; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
563 for(i = 0; i < buf->line_count; i++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 if (buf->line[i]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 slice_buffer_release(buf, i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
569 static void slice_buffer_destroy(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
570 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
571 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
572 slice_buffer_flush(buf); |
2967 | 573 |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
574 for(i = buf->data_count - 1; i >= 0; i--){ |
3190 | 575 av_freep(&buf->data_stack[i]); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
576 } |
3190 | 577 av_freep(&buf->data_stack); |
578 av_freep(&buf->line); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
579 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
580 |
2979 | 581 #ifdef __sgi |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
582 // Avoid a name clash on SGI IRIX |
2979 | 583 #undef qexp |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
584 #endif |
2246 | 585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
2600 | 586 static uint8_t qexp[QROOT]; |
2138 | 587 |
588 static inline int mirror(int v, int m){ | |
2998 | 589 while((unsigned)v > (unsigned)m){ |
590 v=-v; | |
591 if(v<0) v+= 2*m; | |
592 } | |
593 return v; | |
2138 | 594 } |
595 | |
2335 | 596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
2138 | 597 int i; |
598 | |
599 if(v){ | |
4001 | 600 const int a= FFABS(v); |
2138 | 601 const int e= av_log2(a); |
602 #if 1 | |
2967 | 603 const int el= FFMIN(e, 10); |
2335 | 604 put_rac(c, state+0, 0); |
2138 | 605 |
606 for(i=0; i<el; i++){ | |
2335 | 607 put_rac(c, state+1+i, 1); //1..10 |
2138 | 608 } |
609 for(; i<e; i++){ | |
2335 | 610 put_rac(c, state+1+9, 1); //1..10 |
2138 | 611 } |
2335 | 612 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 613 |
614 for(i=e-1; i>=el; i--){ | |
2335 | 615 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
2138 | 616 } |
617 for(; i>=0; i--){ | |
2335 | 618 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 619 } |
620 | |
621 if(is_signed) | |
2335 | 622 put_rac(c, state+11 + el, v < 0); //11..21 |
2138 | 623 #else |
2967 | 624 |
2335 | 625 put_rac(c, state+0, 0); |
2138 | 626 if(e<=9){ |
627 for(i=0; i<e; i++){ | |
2335 | 628 put_rac(c, state+1+i, 1); //1..10 |
2138 | 629 } |
2335 | 630 put_rac(c, state+1+i, 0); |
2138 | 631 |
632 for(i=e-1; i>=0; i--){ | |
2335 | 633 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 634 } |
635 | |
636 if(is_signed) | |
2335 | 637 put_rac(c, state+11 + e, v < 0); //11..21 |
2138 | 638 }else{ |
639 for(i=0; i<e; i++){ | |
2335 | 640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
2138 | 641 } |
2335 | 642 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 643 |
644 for(i=e-1; i>=0; i--){ | |
2335 | 645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
2138 | 646 } |
647 | |
648 if(is_signed) | |
2335 | 649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
2138 | 650 } |
5910 | 651 #endif /* 1 */ |
2138 | 652 }else{ |
2335 | 653 put_rac(c, state+0, 1); |
2138 | 654 } |
655 } | |
656 | |
2335 | 657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
658 if(get_rac(c, state+0)) | |
2138 | 659 return 0; |
660 else{ | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
661 int i, e, a; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
662 e= 0; |
2335 | 663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
664 e++; |
2138 | 665 } |
666 | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
667 a= 1; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
668 for(i=e-1; i>=0; i--){ |
2335 | 669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
670 } |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
671 |
2335 | 672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
2138 | 673 return -a; |
674 else | |
675 return a; | |
676 } | |
677 } | |
678 | |
2335 | 679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
680 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
681 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
682 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
683 assert(v>=0); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
684 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
685 |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
686 while(v >= r){ |
2335 | 687 put_rac(c, state+4+log2, 1); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
688 v -= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
689 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
690 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
691 } |
2335 | 692 put_rac(c, state+4+log2, 0); |
2967 | 693 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
694 for(i=log2-1; i>=0; i--){ |
2335 | 695 put_rac(c, state+31-i, (v>>i)&1); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
696 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
697 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
698 |
2335 | 699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
700 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
701 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
702 int v=0; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
703 |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
704 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
705 |
2335 | 706 while(get_rac(c, state+4+log2)){ |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
707 v+= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
708 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
709 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
710 } |
2967 | 711 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 for(i=log2-1; i>=0; i--){ |
2335 | 713 v+= get_rac(c, state+31-i)<<i; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
714 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
715 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
716 return v; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
717 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
718 |
5627 | 719 static av_always_inline void |
720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
721 int dst_step, int src_step, int ref_step, | |
722 int width, int mul, int add, int shift, | |
723 int highpass, int inverse){ | |
2138 | 724 const int mirror_left= !highpass; |
725 const int mirror_right= (width&1) ^ highpass; | |
726 const int w= (width>>1) - 1 + (highpass & width); | |
727 int i; | |
728 | |
729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
730 if(mirror_left){ | |
731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
732 dst += dst_step; | |
733 src += src_step; | |
734 } | |
2967 | 735 |
2138 | 736 for(i=0; i<w; i++){ |
5627 | 737 dst[i*dst_step] = |
738 LIFT(src[i*src_step], | |
739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
740 inverse); | |
2138 | 741 } |
2967 | 742 |
2138 | 743 if(mirror_right){ |
5627 | 744 dst[w*dst_step] = |
745 LIFT(src[w*src_step], | |
746 ((mul*2*ref[w*ref_step]+add)>>shift), | |
747 inverse); | |
2138 | 748 } |
749 } | |
750 | |
5627 | 751 static av_always_inline void |
752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
753 int dst_step, int src_step, int ref_step, | |
754 int width, int mul, int add, int shift, | |
755 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
756 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
757 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
758 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
759 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
760 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
762 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
764 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
765 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
766 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
767 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
768 for(i=0; i<w; i++){ |
5627 | 769 dst[i*dst_step] = |
770 LIFT(src[i*src_step], | |
771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
772 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
773 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
774 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
775 if(mirror_right){ |
5627 | 776 dst[w*dst_step] = |
777 LIFT(src[w*src_step], | |
778 ((mul*2*ref[w*ref_step]+add)>>shift), | |
779 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
780 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
781 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
782 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
783 #ifndef liftS |
5627 | 784 static av_always_inline void |
785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
786 int dst_step, int src_step, int ref_step, | |
787 int width, int mul, int add, int shift, | |
788 int highpass, int inverse){ | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
789 const int mirror_left= !highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
790 const int mirror_right= (width&1) ^ highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
791 const int w= (width>>1) - 1 + (highpass & width); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
792 int i; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
793 |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
794 assert(shift == 4); |
5627 | 795 #define LIFTS(src, ref, inv) \ |
796 ((inv) ? \ | |
797 (src) + (((ref) + 4*(src))>>shift): \ | |
798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 if(mirror_left){ |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
801 dst += dst_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
802 src += src_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
803 } |
2967 | 804 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
805 for(i=0; i<w; i++){ |
5627 | 806 dst[i*dst_step] = |
807 LIFTS(src[i*src_step], | |
808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
809 inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
810 } |
2967 | 811 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
812 if(mirror_right){ |
5627 | 813 dst[w*dst_step] = |
814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
815 } |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
816 } |
5627 | 817 static av_always_inline void |
818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
819 int dst_step, int src_step, int ref_step, | |
820 int width, int mul, int add, int shift, | |
821 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
822 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
823 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
824 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
825 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
826 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
827 assert(shift == 4); |
5627 | 828 #define LIFTS(src, ref, inv) \ |
829 ((inv) ? \ | |
830 (src) + (((ref) + 4*(src))>>shift): \ | |
831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
835 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
836 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
837 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
838 for(i=0; i<w; i++){ |
5627 | 839 dst[i*dst_step] = |
840 LIFTS(src[i*src_step], | |
841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
842 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
843 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
844 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
845 if(mirror_right){ |
5627 | 846 dst[w*dst_step] = |
847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
848 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
849 } |
6413 | 850 #endif /* ! liftS */ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
851 |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
852 static void horizontal_decompose53i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
853 DWTELEM temp[width]; |
2138 | 854 const int width2= width>>1; |
2893 | 855 int x; |
2138 | 856 const int w2= (width+1)>>1; |
857 | |
858 for(x=0; x<width2; x++){ | |
859 temp[x ]= b[2*x ]; | |
860 temp[x+w2]= b[2*x + 1]; | |
861 } | |
862 if(width&1) | |
863 temp[x ]= b[2*x ]; | |
864 #if 0 | |
2893 | 865 { |
866 int A1,A2,A3,A4; | |
2138 | 867 A2= temp[1 ]; |
868 A4= temp[0 ]; | |
869 A1= temp[0+width2]; | |
870 A1 -= (A2 + A4)>>1; | |
871 A4 += (A1 + 1)>>1; | |
872 b[0+width2] = A1; | |
873 b[0 ] = A4; | |
874 for(x=1; x+1<width2; x+=2){ | |
875 A3= temp[x+width2]; | |
876 A4= temp[x+1 ]; | |
877 A3 -= (A2 + A4)>>1; | |
878 A2 += (A1 + A3 + 2)>>2; | |
879 b[x+width2] = A3; | |
880 b[x ] = A2; | |
881 | |
882 A1= temp[x+1+width2]; | |
883 A2= temp[x+2 ]; | |
884 A1 -= (A2 + A4)>>1; | |
885 A4 += (A1 + A3 + 2)>>2; | |
886 b[x+1+width2] = A1; | |
887 b[x+1 ] = A4; | |
888 } | |
889 A3= temp[width-1]; | |
890 A3 -= A2; | |
891 A2 += (A1 + A3 + 2)>>2; | |
892 b[width -1] = A3; | |
893 b[width2-1] = A2; | |
2893 | 894 } |
2967 | 895 #else |
2138 | 896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
6413 | 898 #endif /* 0 */ |
2138 | 899 } |
900 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 902 int i; |
2967 | 903 |
2138 | 904 for(i=0; i<width; i++){ |
905 b1[i] -= (b0[i] + b2[i])>>1; | |
906 } | |
907 } | |
908 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 910 int i; |
2967 | 911 |
2138 | 912 for(i=0; i<width; i++){ |
913 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
914 } | |
915 } | |
916 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 918 int y; |
2138 | 919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
2967 | 921 |
2138 | 922 for(y=-2; y<height; y+=2){ |
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
925 | |
2998 | 926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
6405 | 928 |
2998 | 929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
2967 | 931 |
2138 | 932 b0=b2; |
933 b1=b3; | |
934 } | |
935 } | |
936 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
937 static void horizontal_decompose97i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
938 DWTELEM temp[width]; |
2138 | 939 const int w2= (width+1)>>1; |
940 | |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
2138 | 944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
945 } | |
946 | |
947 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 949 int i; |
2967 | 950 |
2138 | 951 for(i=0; i<width; i++){ |
952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
953 } | |
954 } | |
955 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 957 int i; |
2967 | 958 |
2138 | 959 for(i=0; i<width; i++){ |
960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
961 } | |
962 } | |
963 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 965 int i; |
2967 | 966 |
2138 | 967 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
968 #ifdef liftS |
2138 | 969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
970 #else |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
972 #endif |
2138 | 973 } |
974 } | |
975 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 977 int i; |
2967 | 978 |
2138 | 979 for(i=0; i<width; i++){ |
980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
981 } | |
982 } | |
983 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 985 int y; |
2138 | 986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
2967 | 990 |
2138 | 991 for(y=-4; y<height; y+=2){ |
992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
994 | |
2998 | 995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
6405 | 997 |
2998 | 998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
2138 | 1002 |
1003 b0=b2; | |
1004 b1=b3; | |
1005 b2=b4; | |
1006 b3=b5; | |
1007 } | |
1008 } | |
1009 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2138 | 1011 int level; |
2967 | 1012 |
2164 | 1013 for(level=0; level<decomposition_count; level++){ |
1014 switch(type){ | |
3326 | 1015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
2138 | 1017 } |
1018 } | |
1019 } | |
1020 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1021 static void horizontal_compose53i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1022 IDWTELEM temp[width]; |
2138 | 1023 const int width2= width>>1; |
1024 const int w2= (width+1)>>1; | |
2893 | 1025 int x; |
2138 | 1026 |
1027 #if 0 | |
2893 | 1028 int A1,A2,A3,A4; |
2138 | 1029 A2= temp[1 ]; |
1030 A4= temp[0 ]; | |
1031 A1= temp[0+width2]; | |
1032 A1 -= (A2 + A4)>>1; | |
1033 A4 += (A1 + 1)>>1; | |
1034 b[0+width2] = A1; | |
1035 b[0 ] = A4; | |
1036 for(x=1; x+1<width2; x+=2){ | |
1037 A3= temp[x+width2]; | |
1038 A4= temp[x+1 ]; | |
1039 A3 -= (A2 + A4)>>1; | |
1040 A2 += (A1 + A3 + 2)>>2; | |
1041 b[x+width2] = A3; | |
1042 b[x ] = A2; | |
1043 | |
1044 A1= temp[x+1+width2]; | |
1045 A2= temp[x+2 ]; | |
1046 A1 -= (A2 + A4)>>1; | |
1047 A4 += (A1 + A3 + 2)>>2; | |
1048 b[x+1+width2] = A1; | |
1049 b[x+1 ] = A4; | |
1050 } | |
1051 A3= temp[width-1]; | |
1052 A3 -= A2; | |
1053 A2 += (A1 + A3 + 2)>>2; | |
1054 b[width -1] = A3; | |
1055 b[width2-1] = A2; | |
2967 | 1056 #else |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
5910 | 1059 #endif /* 0 */ |
2138 | 1060 for(x=0; x<width2; x++){ |
1061 b[2*x ]= temp[x ]; | |
1062 b[2*x + 1]= temp[x+w2]; | |
1063 } | |
1064 if(width&1) | |
1065 b[2*x ]= temp[x ]; | |
1066 } | |
1067 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1069 int i; |
2967 | 1070 |
2138 | 1071 for(i=0; i<width; i++){ |
1072 b1[i] += (b0[i] + b2[i])>>1; | |
1073 } | |
1074 } | |
1075 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1077 int i; |
2967 | 1078 |
2138 | 1079 for(i=0; i<width; i++){ |
1080 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1081 } | |
1082 } | |
1083 | |
8308 | 1084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1087 cs->y = -1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1088 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1089 |
8308 | 1090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1091 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1092 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1093 cs->y = -1; | |
1094 } | |
1095 | |
8308 | 1096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1097 int y= cs->y; |
2967 | 1098 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1099 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1100 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1103 |
2998 | 1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1106 |
2998 | 1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 cs->b0 = b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1111 cs->b1 = b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1112 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1113 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1114 |
8308 | 1115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1116 int y= cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1117 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1118 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
2138 | 1121 |
2998 | 1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1124 |
2998 | 1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2138 | 1127 |
2562 | 1128 cs->b0 = b2; |
1129 cs->b1 = b3; | |
1130 cs->y += 2; | |
1131 } | |
1132 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
8308 | 1134 DWTCompose cs; |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1135 spatial_compose53i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1136 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1137 spatial_compose53i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1138 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1139 |
2967 | 1140 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1142 IDWTELEM temp[width]; |
2138 | 1143 const int w2= (width+1)>>1; |
1144 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
2138 | 1149 } |
1150 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1152 int i; |
2967 | 1153 |
2138 | 1154 for(i=0; i<width; i++){ |
1155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1156 } | |
1157 } | |
1158 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1160 int i; |
2967 | 1161 |
2138 | 1162 for(i=0; i<width; i++){ |
1163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1164 } | |
1165 } | |
1166 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1168 int i; |
2967 | 1169 |
2138 | 1170 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1171 #ifdef liftS |
2138 | 1172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1173 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1175 #endif |
2138 | 1176 } |
1177 } | |
1178 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1180 int i; |
2967 | 1181 |
2138 | 1182 for(i=0; i<width; i++){ |
1183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1184 } | |
1185 } | |
1186 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
2592 | 1188 int i; |
2967 | 1189 |
2592 | 1190 for(i=0; i<width; i++){ |
1191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1193 #ifdef liftS |
2592 | 1194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1195 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1197 #endif |
2592 | 1198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1199 } | |
1200 } | |
1201 | |
8308 | 1202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1207 cs->y = -3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1208 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1209 |
8308 | 1210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1211 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1212 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1213 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1214 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1215 cs->y = -3; | |
1216 } | |
2138 | 1217 |
8308 | 1218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1219 int y = cs->y; |
2967 | 1220 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1221 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1222 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1223 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1224 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
2967 | 1227 |
2592 | 1228 if(y>0 && y+4<height){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
2592 | 1230 }else{ |
2998 | 1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2592 | 1235 } |
6405 | 1236 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1239 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1240 cs->b0=b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1241 cs->b1=b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1242 cs->b2=b4; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1243 cs->b3=b5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1244 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1245 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1246 |
8308 | 1247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1248 int y = cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1249 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1250 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1251 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1252 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
2138 | 1255 |
2998 | 1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
6405 | 1260 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
2562 | 1263 |
1264 cs->b0=b2; | |
1265 cs->b1=b3; | |
1266 cs->b2=b4; | |
1267 cs->b3=b5; | |
1268 cs->y += 2; | |
1269 } | |
1270 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
8308 | 1272 DWTCompose cs; |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1273 spatial_compose97i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1274 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1275 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1276 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1277 |
8308 | 1278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1279 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1280 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1281 switch(type){ |
3326 | 1282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1284 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1285 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1286 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1287 |
8308 | 1288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1289 int level; |
1290 for(level=decomposition_count-1; level>=0; level--){ | |
1291 switch(type){ | |
3326 | 1292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
2562 | 1294 } |
1295 } | |
1296 } | |
1297 | |
8308 | 1298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
2562 | 1299 const int support = type==1 ? 3 : 5; |
1300 int level; | |
1301 if(type==2) return; | |
1302 | |
1303 for(level=decomposition_count-1; level>=0; level--){ | |
1304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1305 switch(type){ | |
3326 | 1306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1307 break; |
3326 | 1308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1309 break; |
2562 | 1310 } |
1311 } | |
2138 | 1312 } |
1313 } | |
1314 | |
8308 | 1315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1316 const int support = type==1 ? 3 : 5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1317 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1318 if(type==2) return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1319 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1320 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1322 switch(type){ |
3326 | 1323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1324 break; |
3326 | 1325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1326 break; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1327 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1328 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1329 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1330 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1331 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
8308 | 1333 DWTCompose cs[MAX_DECOMPOSITIONS]; |
2562 | 1334 int y; |
1335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1336 for(y=0; y<height; y+=4) | |
1337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
2138 | 1338 } |
1339 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1341 const int w= b->width; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1342 const int h= b->height; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1343 int x, y; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1344 |
2138 | 1345 if(1){ |
1346 int run=0; | |
2149 | 1347 int runs[w*h]; |
2138 | 1348 int run_index=0; |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1349 int max_index; |
2967 | 1350 |
2138 | 1351 for(y=0; y<h; y++){ |
1352 for(x=0; x<w; x++){ | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1353 int v, p=0; |
2144 | 1354 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1355 v= src[x + y*stride]; |
2138 | 1356 |
1357 if(y){ | |
2149 | 1358 t= src[x + (y-1)*stride]; |
2138 | 1359 if(x){ |
2149 | 1360 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1361 } |
1362 if(x + 1 < w){ | |
2149 | 1363 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1364 } |
1365 } | |
1366 if(x){ | |
2149 | 1367 l= src[x - 1 + y*stride]; |
2144 | 1368 /*if(x > 1){ |
1369 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1370 else ll= src[x - 2 + y*stride]; | |
2138 | 1371 }*/ |
1372 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1373 if(parent){ |
2149 | 1374 int px= x>>1; |
1375 int py= y>>1; | |
2967 | 1376 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1377 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1378 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1379 if(!(/*ll|*/l|lt|t|rt|p)){ |
2138 | 1380 if(v){ |
1381 runs[run_index++]= run; | |
1382 run=0; | |
1383 }else{ | |
1384 run++; | |
1385 } | |
1386 } | |
1387 } | |
1388 } | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1389 max_index= run_index; |
2138 | 1390 runs[run_index++]= run; |
1391 run_index=0; | |
1392 run= runs[run_index++]; | |
1393 | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1394 put_symbol2(&s->c, b->state[30], max_index, 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1395 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1396 put_symbol2(&s->c, b->state[1], run, 3); |
2967 | 1397 |
2138 | 1398 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1399 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
2422 | 1400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1401 return -1; | |
1402 } | |
2138 | 1403 for(x=0; x<w; x++){ |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1404 int v, p=0; |
2144 | 1405 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1406 v= src[x + y*stride]; |
2138 | 1407 |
1408 if(y){ | |
2149 | 1409 t= src[x + (y-1)*stride]; |
2138 | 1410 if(x){ |
2149 | 1411 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1412 } |
1413 if(x + 1 < w){ | |
2149 | 1414 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1415 } |
1416 } | |
1417 if(x){ | |
2149 | 1418 l= src[x - 1 + y*stride]; |
2144 | 1419 /*if(x > 1){ |
1420 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1421 else ll= src[x - 2 + y*stride]; | |
2138 | 1422 }*/ |
1423 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1424 if(parent){ |
2149 | 1425 int px= x>>1; |
1426 int py= y>>1; | |
2967 | 1427 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1428 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1429 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1430 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1431 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
2144 | 1432 |
2335 | 1433 put_rac(&s->c, &b->state[0][context], !!v); |
2138 | 1434 }else{ |
1435 if(!run){ | |
1436 run= runs[run_index++]; | |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1437 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1438 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1439 put_symbol2(&s->c, b->state[1], run, 3); |
2138 | 1440 assert(v); |
1441 }else{ | |
1442 run--; | |
1443 assert(!v); | |
1444 } | |
1445 } | |
1446 if(v){ | |
4001 | 1447 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
1448 int l2= 2*FFABS(l) + (l<0); | |
1449 int t2= 2*FFABS(t) + (t<0); | |
1450 | |
1451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
2596 | 1452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
2138 | 1453 } |
1454 } | |
1455 } | |
1456 } | |
2422 | 1457 return 0; |
2138 | 1458 } |
1459 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1461 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1462 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
2422 | 1463 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1464 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1465 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1466 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
2138 | 1468 const int w= b->width; |
1469 const int h= b->height; | |
1470 int x,y; | |
2967 | 1471 |
2138 | 1472 if(1){ |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1473 int run, runs; |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1474 x_and_coeff *xc= b->x_coeff; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1475 x_and_coeff *prev_xc= NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1476 x_and_coeff *prev2_xc= xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1478 x_and_coeff *prev_parent_xc= parent_xc; |
2138 | 1479 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1480 runs= get_symbol2(&s->c, b->state[30], 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1482 else run= INT_MAX; |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1483 |
2138 | 1484 for(y=0; y<h; y++){ |
2193 | 1485 int v=0; |
1486 int lt=0, t=0, rt=0; | |
1487 | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1488 if(y && prev_xc->x == 0){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1489 rt= prev_xc->coeff; |
2193 | 1490 } |
2138 | 1491 for(x=0; x<w; x++){ |
2193 | 1492 int p=0; |
1493 const int l= v; | |
2967 | 1494 |
2193 | 1495 lt= t; t= rt; |
1496 | |
2194 | 1497 if(y){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1498 if(prev_xc->x <= x) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1499 prev_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1500 if(prev_xc->x == x + 1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1501 rt= prev_xc->coeff; |
2194 | 1502 else |
1503 rt=0; | |
1504 } | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1505 if(parent_xc){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1506 if(x>>1 > parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1507 parent_xc++; |
2192 | 1508 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1509 if(x>>1 == parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1510 p= parent_xc->coeff; |
2194 | 1511 } |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1512 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1513 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1514 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
2144 | 1515 |
2335 | 1516 v=get_rac(&s->c, &b->state[0][context]); |
2605 | 1517 if(v){ |
1518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
2967 | 1520 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1521 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1522 (xc++)->coeff= v; |
2605 | 1523 } |
2138 | 1524 }else{ |
1525 if(!run){ | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1527 else run= INT_MAX; |
2605 | 1528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
2967 | 1530 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1531 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1532 (xc++)->coeff= v; |
2138 | 1533 }else{ |
2606 | 1534 int max_run; |
2138 | 1535 run--; |
1536 v=0; | |
2191 | 1537 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
2606 | 1539 else max_run= FFMIN(run, w-x-1); |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1540 if(parent_xc) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
2606 | 1542 x+= max_run; |
1543 run-= max_run; | |
2138 | 1544 } |
1545 } | |
2192 | 1546 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1547 (xc++)->x= w+1; //end marker |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1548 prev_xc= prev2_xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1549 prev2_xc= xc; |
2967 | 1550 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1551 if(parent_xc){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1552 if(y&1){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1553 while(parent_xc->x != parent->width+1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1554 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1555 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1556 prev_parent_xc= parent_xc; |
2192 | 1557 }else{ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1558 parent_xc= prev_parent_xc; |
2138 | 1559 } |
1560 } | |
1561 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1562 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1563 (xc++)->x= w+1; //end marker |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1564 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1565 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1566 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1568 const int w= b->width; |
2893 | 1569 int y; |
4594 | 1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1573 int new_index = 0; |
2967 | 1574 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1576 qadd= 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1577 qmul= 1<<QEXPSHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1578 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1579 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1580 /* If we are on the second or later slice, restore our index. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1581 if (start_y != 0) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1582 new_index = save_state[0]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1583 |
2967 | 1584 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1585 for(y=start_y; y<h; y++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1586 int x = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1587 int v; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1589 memset(line, 0, b->width*sizeof(IDWTELEM)); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1590 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1591 x = b->x_coeff[new_index++].x; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
1592 while(x < w){ |
2596 | 1593 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1594 register int u= -(v&1); | |
1595 line[x] = (t^u) - u; | |
1596 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1597 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1598 x = b->x_coeff[new_index++].x; |
2138 | 1599 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1600 } |
2967 | 1601 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1602 /* Save our variables for the next slice. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1603 save_state[0] = new_index; |
2967 | 1604 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1605 return; |
2138 | 1606 } |
1607 | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1608 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
2138 | 1609 int plane_index, level, orientation; |
1610 | |
2199 | 1611 for(plane_index=0; plane_index<3; plane_index++){ |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
1612 for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
2138 | 1613 for(orientation=level ? 1:0; orientation<4; orientation++){ |
2335 | 1614 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
2138 | 1615 } |
1616 } | |
1617 } | |
2335 | 1618 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1619 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1620 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1621 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1622 static int alloc_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1623 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1624 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
2967 | 1625 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1626 s->b_width = w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1627 s->b_height= h; |
2967 | 1628 |
9370 | 1629 av_free(s->block); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1630 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1631 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1632 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1633 |
2335 | 1634 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1635 uint8_t *bytestream= d->bytestream; | |
1636 uint8_t *bytestream_start= d->bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1637 *d= *s; |
2335 | 1638 d->bytestream= bytestream; |
1639 d->bytestream_start= bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1640 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1641 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1642 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1643 static int pix_sum(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1644 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1645 int s, i, j; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1646 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1647 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1648 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1649 for (j = 0; j < w; j++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1650 s += pix[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1651 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1652 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1653 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1654 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1655 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1656 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1657 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1658 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1659 static int pix_norm1(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1660 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 int s, i, j; |
4179 | 1662 uint32_t *sq = ff_squareTbl + 256; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1664 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 for (j = 0; j < w; j ++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1667 s += sq[pix[0]]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1668 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1669 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1671 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1672 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1673 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1674 |
3314 | 1675 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1676 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1677 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1678 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 const int block_w= 1<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1680 BlockNode block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 int i,j; |
2967 | 1682 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 block.color[0]= l; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1684 block.color[1]= cb; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 block.color[2]= cr; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1686 block.mx= mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 block.my= my; |
3314 | 1688 block.ref= ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 block.type= type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 block.level= level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 for(j=0; j<block_w; j++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 for(i=0; i<block_w; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 s->block[index + i + j*w]= block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1695 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1700 const int offset[3]= { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 y*c-> stride + x, |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 int i; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 for(i=0; i<3; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 c->src[0][i]= src [i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1708 c->ref[0][i]= ref [i] + offset[i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1709 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1710 assert(!ref_index); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1711 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1712 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1713 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1714 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1715 if(s->ref_frames == 1){ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1716 *mx = mid_pred(left->mx, top->mx, tr->mx); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1717 *my = mid_pred(left->my, top->my, tr->my); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1718 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1719 const int *scale = scale_mv_ref[ref]; |
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1720 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1721 (top ->mx * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1722 (tr ->mx * scale[tr ->ref] + 128) >>8); |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1723 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1724 (top ->my * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1725 (tr ->my * scale[tr ->ref] + 128) >>8); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1726 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1727 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1728 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 //FIXME copy&paste |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 #define P_LEFT P[1] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 #define P_TOP P[2] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 #define P_TOPRIGHT P[3] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 #define P_MEDIAN P[4] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 #define P_MV1 P[9] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 #define FLAG_QPEL 1 //must be 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1738 uint8_t p_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1739 uint8_t i_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1740 uint8_t p_state[sizeof(s->block_state)]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1741 uint8_t i_state[sizeof(s->block_state)]; |
2335 | 1742 RangeCoder pc, ic; |
1743 uint8_t *pbbak= s->c.bytestream; | |
1744 uint8_t *pbbak_start= s->c.bytestream_start; | |
5082 | 1745 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1746 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1747 const int h= s->b_height << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1748 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1749 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1750 const int block_w= 1<<(LOG2_MB_SIZE - level); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1751 int trx= (x+1)<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1752 int try= (y+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1753 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1754 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1755 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1756 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1757 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1758 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1759 int pl = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1760 int pcb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1761 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1762 int pmx, pmy; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1763 int mx=0, my=0; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1764 int l,cr,cb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1765 const int stride= s->current_picture.linesize[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1766 const int uvstride= s->current_picture.linesize[1]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1767 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1768 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1769 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 int P[10][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1771 int16_t last_mv[3][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1772 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1773 const int shift= 1+qpel; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1774 MotionEstContext *c= &s->m.me; |
3314 | 1775 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1776 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
1777 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1778 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
3314 | 1779 int ref, best_ref, ref_score, ref_mx, ref_my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1780 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1781 assert(sizeof(s->block_state) >= 256); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1782 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1783 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1786 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1787 // clip predictors / edge ? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1788 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1789 P_LEFT[0]= left->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1790 P_LEFT[1]= left->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1791 P_TOP [0]= top->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1792 P_TOP [1]= top->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1793 P_TOPRIGHT[0]= tr->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1794 P_TOPRIGHT[1]= tr->my; |
2967 | 1795 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1796 last_mv[0][0]= s->block[index].mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1797 last_mv[0][1]= s->block[index].my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1798 last_mv[1][0]= right->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 last_mv[1][1]= right->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1800 last_mv[2][0]= bottom->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1801 last_mv[2][1]= bottom->my; |
2967 | 1802 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1803 s->m.mb_stride=2; |
2967 | 1804 s->m.mb_x= |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1805 s->m.mb_y= 0; |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1806 c->skip= 0; |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1807 |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1808 assert(c-> stride == stride); |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1809 assert(c->uvstride == uvstride); |
2967 | 1810 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1811 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1812 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1813 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1814 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
2967 | 1815 |
2206 | 1816 c->xmin = - x*block_w - 16+2; |
1817 c->ymin = - y*block_w - 16+2; | |
1818 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
1819 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1820 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1821 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
2967 | 1822 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1828 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 if (!y) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1833 c->pred_x= P_LEFT[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 c->pred_y= P_LEFT[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1835 } else { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1836 c->pred_x = P_MEDIAN[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1837 c->pred_y = P_MEDIAN[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1838 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1839 |
3314 | 1840 score= INT_MAX; |
1841 best_ref= 0; | |
1842 for(ref=0; ref<s->ref_frames; ref++){ | |
1843 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
1844 | |
1845 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
1846 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
1847 | |
1848 assert(ref_mx >= c->xmin); | |
1849 assert(ref_mx <= c->xmax); | |
1850 assert(ref_my >= c->ymin); | |
1851 assert(ref_my <= c->ymax); | |
1852 | |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1853 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
3314 | 1854 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
1855 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
1856 if(s->ref_mvs[ref]){ | |
1857 s->ref_mvs[ref][index][0]= ref_mx; | |
1858 s->ref_mvs[ref][index][1]= ref_my; | |
1859 s->ref_scores[ref][index]= ref_score; | |
1860 } | |
1861 if(score > ref_score){ | |
1862 score= ref_score; | |
1863 best_ref= ref; | |
1864 mx= ref_mx; | |
1865 my= ref_my; | |
1866 } | |
1867 } | |
5127 | 1868 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
2967 | 1869 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1870 // subpel search |
5085 | 1871 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1872 pc= s->c; |
2335 | 1873 pc.bytestream_start= |
1874 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1875 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1876 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1877 if(level!=s->block_max_depth) |
2335 | 1878 put_rac(&pc, &p_state[4 + s_context], 1); |
1879 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
3314 | 1880 if(s->ref_frames > 1) |
1881 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1882 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
3314 | 1883 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
1884 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
2335 | 1885 p_len= pc.bytestream - pc.bytestream_start; |
5082 | 1886 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1887 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1888 block_s= block_w*block_w; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1889 sum = pix_sum(current_data[0], stride, block_w); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1890 l= (sum + block_s/2)/block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1891 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2967 | 1892 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1893 block_s= block_w*block_w>>2; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1894 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1895 cb= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1896 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1897 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1898 cr= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1899 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1900 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1901 ic= s->c; |
2335 | 1902 ic.bytestream_start= |
1903 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1904 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1905 if(level!=s->block_max_depth) |
2335 | 1906 put_rac(&ic, &i_state[4 + s_context], 1); |
1907 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1908 put_symbol(&ic, &i_state[32], l-pl , 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1909 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1910 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
2335 | 1911 i_len= ic.bytestream - ic.bytestream_start; |
5082 | 1912 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1913 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1914 // assert(score==256*256*256*64-1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1915 assert(iscore < 255*255*256 + s->lambda2*10); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1916 assert(iscore >= 0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1917 assert(l>=0 && l<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1918 assert(pl>=0 && pl<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1919 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1920 if(level==0){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1921 int varc= iscore >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1922 int vard= score >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1923 if (vard <= 64 || vard < varc) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1924 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1925 else |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1926 c->scene_change_score+= s->m.qscale; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1927 } |
2967 | 1928 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1929 if(level!=s->block_max_depth){ |
2335 | 1930 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1934 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1935 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
2967 | 1936 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 if(score2 < score && score2 < iscore) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1938 return score2; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1939 } |
2967 | 1940 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1941 if(iscore < score){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1942 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2335 | 1943 memcpy(pbbak, i_buffer, i_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1944 s->c= ic; |
2335 | 1945 s->c.bytestream_start= pbbak_start; |
1946 s->c.bytestream= pbbak + i_len; | |
3314 | 1947 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1949 return iscore; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1950 }else{ |
2335 | 1951 memcpy(pbbak, p_buffer, p_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1952 s->c= pc; |
2335 | 1953 s->c.bytestream_start= pbbak_start; |
1954 s->c.bytestream= pbbak + p_len; | |
3314 | 1955 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1957 return score; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1958 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1959 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1960 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1961 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1962 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1963 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1964 }else{ |
3314 | 1965 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1966 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1967 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1968 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1969 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1970 const int w= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1971 const int rem_depth= s->block_max_depth - level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1972 const int index= (x + y*w) << rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1973 int trx= (x+1)<<rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1974 BlockNode *b= &s->block[index]; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1975 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1976 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1977 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1978 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1979 int pl = left->color[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1980 int pcb= left->color[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1981 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1982 int pmx, pmy; |
3314 | 1983 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1984 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
1985 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1986 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1987 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1988 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1989 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1990 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1991 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1992 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1993 if(level!=s->block_max_depth){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1994 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1995 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1996 }else{ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1997 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2002 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2003 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2004 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 if(b->type & BLOCK_INTRA){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2006 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2007 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2008 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2009 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2010 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
3314 | 2011 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2012 }else{ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2013 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2014 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
3314 | 2015 if(s->ref_frames > 1) |
2016 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2017 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2018 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
3314 | 2019 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2020 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2021 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2022 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2023 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2024 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2025 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2026 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2027 int trx= (x+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2028 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2029 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2030 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2031 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
2967 | 2033 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2034 if(s->keyframe){ |
3314 | 2035 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2036 return; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2037 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2038 |
2335 | 2039 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
4332 | 2040 int type, mx, my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2041 int l = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2042 int cb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2043 int cr= left->color[2]; |
3314 | 2044 int ref = 0; |
2045 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
4001 | 2046 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
2047 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2967 | 2048 |
2335 | 2049 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2050 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2051 if(type){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2052 pred_mv(s, &mx, &my, 0, left, top, tr); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2053 l += get_symbol(&s->c, &s->block_state[32], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2054 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2055 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2056 }else{ |
3314 | 2057 if(s->ref_frames > 1) |
2058 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2059 pred_mv(s, &mx, &my, ref, left, top, tr); |
3314 | 2060 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
2061 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2062 } |
3314 | 2063 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2064 }else{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2067 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2068 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2069 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2070 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2071 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2072 static void encode_blocks(SnowContext *s, int search){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2073 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2074 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2075 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2076 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2077 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2078 iterative_me(s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2079 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2080 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2081 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2422 | 2082 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2083 return; | |
2084 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2085 for(x=0; x<w; x++){ |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2086 if(s->avctx->me_method == ME_ITER || !search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2087 encode_q_branch2(s, 0, x, y); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2088 else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2089 encode_q_branch (s, 0, x, y); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2090 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2091 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2092 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2093 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2094 static void decode_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2095 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2096 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2097 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2098 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2099 for(y=0; y<h; y++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2100 for(x=0; x<w; x++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2101 decode_q_branch(s, 0, x, y); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2102 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2103 } |
2138 | 2104 } |
2105 | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
7622 | 2107 static const uint8_t weight[64]={ |
5648 | 2108 8,7,6,5,4,3,2,1, |
2109 7,7,0,0,0,0,0,1, | |
2110 6,0,6,0,0,0,2,0, | |
2111 5,0,0,5,0,3,0,0, | |
2112 4,0,0,0,4,0,0,0, | |
2113 3,0,0,5,0,3,0,0, | |
2114 2,0,6,0,0,0,2,0, | |
2115 1,7,0,0,0,0,0,1, | |
2116 }; | |
2117 | |
7622 | 2118 static const uint8_t brane[256]={ |
5648 | 2119 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, |
2120 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2121 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2122 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2123 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2124 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2125 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2126 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2127 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2128 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2129 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2130 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2131 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2132 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2133 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2134 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2135 }; | |
2136 | |
7622 | 2137 static const uint8_t needs[16]={ |
5648 | 2138 0,1,0,0, |
2139 2,4,2,0, | |
2140 0,1,0,0, | |
2141 15 | |
2142 }; | |
2143 | |
2144 int x, y, b, r, l; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2145 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
5648 | 2147 int16_t *tmpI= tmpIt; |
2148 uint8_t *tmp2= tmp2t[0]; | |
6205 | 2149 const uint8_t *hpel[11]; |
5648 | 2150 assert(dx<16 && dy<16); |
2151 r= brane[dx + 16*dy]&15; | |
2152 l= brane[dx + 16*dy]>>4; | |
2153 | |
2154 b= needs[l] | needs[r]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2155 if(p && !p->diag_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2156 b= 15; |
5648 | 2157 |
2158 if(b&5){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2159 for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
5649 | 2160 for(x=0; x < b_w; x++){ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2161 int a_1=src[x + HTAPS_MAX/2-4]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2162 int a0= src[x + HTAPS_MAX/2-3]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2163 int a1= src[x + HTAPS_MAX/2-2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2164 int a2= src[x + HTAPS_MAX/2-1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2165 int a3= src[x + HTAPS_MAX/2+0]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2166 int a4= src[x + HTAPS_MAX/2+1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2167 int a5= src[x + HTAPS_MAX/2+2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2168 int a6= src[x + HTAPS_MAX/2+3]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2169 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2170 if(!p || p->fast_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2171 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2172 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2173 am= (am+16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2174 }else{ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2175 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2176 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2177 am= (am+32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2178 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2179 |
5649 | 2180 if(am&(~255)) am= ~(am>>31); |
2181 tmp2[x]= am; | |
2182 } | |
2183 tmpI+= 64; | |
2184 tmp2+= stride; | |
2185 src += stride; | |
2138 | 2186 } |
5649 | 2187 src -= stride*y; |
5648 | 2188 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2189 src += HTAPS_MAX/2 - 1; |
5648 | 2190 tmp2= tmp2t[1]; |
2191 | |
2192 if(b&2){ | |
5649 | 2193 for(y=0; y < b_h; y++){ |
2194 for(x=0; x < b_w+1; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2195 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2196 int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2197 int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2198 int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2199 int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2200 int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2201 int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2202 int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2203 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2204 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2205 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2206 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2207 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2208 |
5649 | 2209 if(am&(~255)) am= ~(am>>31); |
2210 tmp2[x]= am; | |
2211 } | |
2212 src += stride; | |
2213 tmp2+= stride; | |
5648 | 2214 } |
5649 | 2215 src -= stride*y; |
5648 | 2216 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2217 src += stride*(HTAPS_MAX/2 - 1); |
5648 | 2218 tmp2= tmp2t[2]; |
2219 tmpI= tmpIt; | |
2220 if(b&4){ | |
2221 for(y=0; y < b_h; y++){ | |
2222 for(x=0; x < b_w; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2223 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2224 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2225 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2226 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2227 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2228 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2229 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2230 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2231 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2232 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2234 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
5648 | 2236 if(am&(~255)) am= ~(am>>31); |
2237 tmp2[x]= am; | |
2238 } | |
2239 tmpI+= 64; | |
2240 tmp2+= stride; | |
2138 | 2241 } |
5648 | 2242 } |
2243 | |
2244 hpel[ 0]= src; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2245 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); |
5648 | 2246 hpel[ 2]= src + 1; |
2247 | |
2248 hpel[ 4]= tmp2t[1]; | |
2249 hpel[ 5]= tmp2t[2]; | |
2250 hpel[ 6]= tmp2t[1] + 1; | |
2251 | |
2252 hpel[ 8]= src + stride; | |
2253 hpel[ 9]= hpel[1] + stride; | |
2254 hpel[10]= hpel[8] + 1; | |
2255 | |
2256 if(b==15){ | |
6205 | 2257 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; |
2258 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2259 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2260 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
5648 | 2261 dx&=7; |
2262 dy&=7; | |
2263 for(y=0; y < b_h; y++){ | |
2264 for(x=0; x < b_w; x++){ | |
2265 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2266 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2267 } | |
2268 src1+=stride; | |
2269 src2+=stride; | |
2270 src3+=stride; | |
2271 src4+=stride; | |
2272 dst +=stride; | |
2273 } | |
2274 }else{ | |
6205 | 2275 const uint8_t *src1= hpel[l]; |
2276 const uint8_t *src2= hpel[r]; | |
5648 | 2277 int a= weight[((dx&7) + (8*(dy&7)))]; |
2278 int b= 8-a; | |
2279 for(y=0; y < b_h; y++){ | |
2280 for(x=0; x < b_w; x++){ | |
2281 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2282 } | |
2283 src1+=stride; | |
2284 src2+=stride; | |
2285 dst +=stride; | |
2286 } | |
2138 | 2287 } |
2288 } | |
2289 | |
2290 #define mca(dx,dy,b_w)\ | |
5254 | 2291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
2138 | 2293 assert(h==b_w);\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2294 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ |
2138 | 2295 } |
2296 | |
2297 mca( 0, 0,16) | |
2298 mca( 8, 0,16) | |
2299 mca( 0, 8,16) | |
2300 mca( 8, 8,16) | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2301 mca( 0, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2302 mca( 8, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2303 mca( 0, 8,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2304 mca( 8, 8,8) |
2138 | 2305 |
3314 | 2306 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2307 if(block->type & BLOCK_INTRA){ |
2206 | 2308 int x, y; |
3018 | 2309 const int color = block->color[plane_index]; |
2310 const int color4= color*0x01010101; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2311 if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2312 for(y=0; y < b_h; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2313 *(uint32_t*)&dst[0 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2314 *(uint32_t*)&dst[4 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2315 *(uint32_t*)&dst[8 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2316 *(uint32_t*)&dst[12+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2317 *(uint32_t*)&dst[16+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2318 *(uint32_t*)&dst[20+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2319 *(uint32_t*)&dst[24+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2320 *(uint32_t*)&dst[28+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2321 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2322 }else if(b_w==16){ |
3018 | 2323 for(y=0; y < b_h; y++){ |
2324 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2325 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2326 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2327 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2328 } | |
2329 }else if(b_w==8){ | |
2330 for(y=0; y < b_h; y++){ | |
2331 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2332 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2333 } | |
2334 }else if(b_w==4){ | |
2335 for(y=0; y < b_h; y++){ | |
2336 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2337 } | |
2338 }else{ | |
2339 for(y=0; y < b_h; y++){ | |
2340 for(x=0; x < b_w; x++){ | |
2341 dst[x + y*stride]= color; | |
2342 } | |
2138 | 2343 } |
2344 } | |
2345 }else{ | |
3314 | 2346 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
2206 | 2347 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
2348 int mx= block->mx*scale; | |
2349 int my= block->my*scale; | |
2223 | 2350 const int dx= mx&15; |
2351 const int dy= my&15; | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2352 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2353 sx += (mx>>4) - (HTAPS_MAX/2-1); |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2354 sy += (my>>4) - (HTAPS_MAX/2-1); |
2206 | 2355 src += sx + sy*stride; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2356 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2357 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2358 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); |
2206 | 2359 src= tmp + MB_SIZE; |
2138 | 2360 } |
3189 | 2361 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2362 // assert(!(b_w&(b_w-1))); | |
3018 | 2363 assert(b_w>1 && b_h>1); |
6206
ea591bc7522a
add parenthesis, fix warning: snow.c:2413: warning: suggest parentheses around && within ||
bcoudurier
parents:
6205
diff
changeset
|
2364 assert((tab_index>=0 && tab_index<4) || b_w==32); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2365 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2366 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2367 else if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2368 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2369 for(y=0; y<b_h; y+=16){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2371 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2372 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2373 }else if(b_w==b_h) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
3018 | 2375 else if(b_w==2*b_h){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
3018 | 2378 }else{ |
2379 assert(2*b_w==b_h); | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2381 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
3018 | 2382 } |
2138 | 2383 } |
2384 } | |
2385 | |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2386 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2387 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2388 int y, x; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2389 IDWTELEM * dst; |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2390 for(y=0; y<b_h; y++){ |
5409 | 2391 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2392 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2393 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2394 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2395 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2396 dst = slice_buffer_get_line(sb, src_y + y); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2397 for(x=0; x<b_w; x++){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2398 int v= obmc1[x] * block[3][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2399 +obmc2[x] * block[2][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2400 +obmc3[x] * block[1][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2401 +obmc4[x] * block[0][x + y*src_stride]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2402 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2403 v <<= 8 - LOG2_OBMC_MAX; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2404 if(FRAC_BITS != 8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2405 v >>= 8 - FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2406 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2407 if(add){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2408 v += dst[x + src_x]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2409 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2410 if(v&(~255)) v= ~(v>>31); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2411 dst8[x + y*src_stride] = v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2412 }else{ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2413 dst[x + src_x] -= v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2414 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2415 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2416 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2417 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2418 |
6412 | 2419 //FIXME name cleanup (b_w, block_w, b_width stuff) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2420 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2421 const int b_width = s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2422 const int b_height= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2423 const int b_stride= b_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2424 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2425 BlockNode *rt= lt+1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2426 BlockNode *lb= lt+b_stride; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2427 BlockNode *rb= lb+1; |
2967 | 2428 uint8_t *block[4]; |
2842 | 2429 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
2430 uint8_t *tmp = s->scratchbuf; |
2842 | 2431 uint8_t *ptmp; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2432 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2433 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2434 if(b_x<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2435 lt= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2436 lb= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2437 }else if(b_x + 1 >= b_width){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2438 rt= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2439 rb= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2440 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2441 if(b_y<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2442 lt= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2443 rt= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2444 }else if(b_y + 1 >= b_height){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2445 lb= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2446 rb= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2447 } |
2967 | 2448 |
6412 | 2449 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2450 obmc -= src_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2451 b_w += src_x; |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2452 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2453 dst -= src_x; |
2206 | 2454 src_x=0; |
2455 }else if(src_x + b_w > w){ | |
2456 b_w = w - src_x; | |
2457 } | |
2458 if(src_y<0){ | |
2459 obmc -= src_y*obmc_stride; | |
2460 b_h += src_y; | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2461 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2462 dst -= src_y*dst_stride; |
2206 | 2463 src_y=0; |
2464 }else if(src_y + b_h> h){ | |
2465 b_h = h - src_y; | |
2466 } | |
2967 | 2467 |
2206 | 2468 if(b_w<=0 || b_h<=0) return; |
2469 | |
6414 | 2470 assert(src_stride > 2*MB_SIZE + 5); |
2471 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2472 if(!sliced && offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2473 dst += src_x + src_y*dst_stride; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2474 dst8+= src_x + src_y*src_stride; |
2206 | 2475 // src += src_x + src_y*src_stride; |
2476 | |
2842 | 2477 ptmp= tmp + 3*tmp_step; |
2478 block[0]= ptmp; | |
2479 ptmp+=tmp_step; | |
3314 | 2480 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
2206 | 2481 |
2482 if(same_block(lt, rt)){ | |
2483 block[1]= block[0]; | |
2484 }else{ | |
2842 | 2485 block[1]= ptmp; |
2486 ptmp+=tmp_step; | |
3314 | 2487 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2206 | 2488 } |
2967 | 2489 |
2206 | 2490 if(same_block(lt, lb)){ |
2491 block[2]= block[0]; | |
2492 }else if(same_block(rt, lb)){ | |
2493 block[2]= block[1]; | |
2494 }else{ | |
2842 | 2495 block[2]= ptmp; |
2496 ptmp+=tmp_step; | |
3314 | 2497 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2206 | 2498 } |
2499 | |
2500 if(same_block(lt, rb) ){ | |
2501 block[3]= block[0]; | |
2502 }else if(same_block(rt, rb)){ | |
2503 block[3]= block[1]; | |
2504 }else if(same_block(lb, rb)){ | |
2505 block[3]= block[2]; | |
2506 }else{ | |
2842 | 2507 block[3]= ptmp; |
3314 | 2508 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2206 | 2509 } |
2510 #if 0 | |
2511 for(y=0; y<b_h; y++){ | |
2512 for(x=0; x<b_w; x++){ | |
2513 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2514 if(add) dst[x + y*dst_stride] += v; | |
2515 else dst[x + y*dst_stride] -= v; | |
2516 } | |
2517 } | |
2518 for(y=0; y<b_h; y++){ | |
2519 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2520 for(x=0; x<b_w; x++){ | |
2521 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2522 if(add) dst[x + y*dst_stride] += v; | |
2523 else dst[x + y*dst_stride] -= v; | |
2524 } | |
2525 } | |
2526 for(y=0; y<b_h; y++){ | |
2527 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2528 for(x=0; x<b_w; x++){ | |
2529 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2530 if(add) dst[x + y*dst_stride] += v; | |
2531 else dst[x + y*dst_stride] -= v; | |
2532 } | |
2533 } | |
2534 for(y=0; y<b_h; y++){ | |
2535 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2536 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2537 for(x=0; x<b_w; x++){ | |
2538 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2539 if(add) dst[x + y*dst_stride] += v; | |
2540 else dst[x + y*dst_stride] -= v; | |
2541 } | |
2542 } | |
2543 #else | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2544 if(sliced){ |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2545 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
6475 | 2546 }else{ |
2547 for(y=0; y<b_h; y++){ | |
2548 //FIXME ugly misuse of obmc_stride | |
2549 const uint8_t *obmc1= obmc + y*obmc_stride; | |
2550 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2551 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2552 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2553 for(x=0; x<b_w; x++){ | |
2554 int v= obmc1[x] * block[3][x + y*src_stride] | |
2555 +obmc2[x] * block[2][x + y*src_stride] | |
2556 +obmc3[x] * block[1][x + y*src_stride] | |
2557 +obmc4[x] * block[0][x + y*src_stride]; | |
2558 | |
2559 v <<= 8 - LOG2_OBMC_MAX; | |
2560 if(FRAC_BITS != 8){ | |
2561 v >>= 8 - FRAC_BITS; | |
2562 } | |
2563 if(add){ | |
2564 v += dst[x + y*dst_stride]; | |
2565 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2566 if(v&(~255)) v= ~(v>>31); | |
2567 dst8[x + y*src_stride] = v; | |
2568 }else{ | |
2569 dst[x + y*dst_stride] -= v; | |
2570 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2571 } |
2206 | 2572 } |
2573 } | |
5910 | 2574 #endif /* 0 */ |
2206 | 2575 } |
2576 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2577 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2578 Plane *p= &s->plane[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2579 const int mb_w= s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2580 const int mb_h= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2581 int x, y, mb_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2582 int block_size = MB_SIZE >> s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2583 int block_w = plane_index ? block_size/2 : block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2584 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2585 int obmc_stride= plane_index ? block_size : 2*block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2586 int ref_stride= s->current_picture.linesize[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2587 uint8_t *dst8= s->current_picture.data[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2588 int w= p->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2589 int h= p->height; |
2967 | 2590 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2591 if(s->keyframe || (s->avctx->debug&512)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2592 if(mb_y==mb_h) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2593 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2594 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2595 if(add){ |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2596 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2597 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2598 IDWTELEM * line = sb->line[y]; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2599 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2600 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2601 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2602 v >>= FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2603 if(v&(~255)) v= ~(v>>31); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2604 dst8[x + y*ref_stride]= v; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2605 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2606 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2607 }else{ |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2608 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2609 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2610 IDWTELEM * line = sb->line[y]; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2611 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2612 line[x] -= 128 << FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2613 // buf[x + y*w]-= 128<<FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2614 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2615 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2616 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2617 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2618 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2619 } |
2967 | 2620 |
6475 | 2621 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2622 add_yblock(s, 1, sb, old_buffer, dst8, obmc, | |
2623 block_w*mb_x - block_w/2, | |
2624 block_w*mb_y - block_w/2, | |
2625 block_w, block_w, | |
2626 w, h, | |
2627 w, ref_stride, obmc_stride, | |
2628 mb_x - 1, mb_y - 1, | |
2629 add, 0, plane_index); | |
2630 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2631 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2632 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2633 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2138 | 2634 Plane *p= &s->plane[plane_index]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2635 const int mb_w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2636 const int mb_h= s->b_height << s->block_max_depth; |
2562 | 2637 int x, y, mb_x; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2638 int block_size = MB_SIZE >> s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2639 int block_w = plane_index ? block_size/2 : block_size; |
2206 | 2640 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2641 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2642 int ref_stride= s->current_picture.linesize[plane_index]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2643 uint8_t *dst8= s->current_picture.data[plane_index]; |
2138 | 2644 int w= p->width; |
2645 int h= p->height; | |
2967 | 2646 |
2206 | 2647 if(s->keyframe || (s->avctx->debug&512)){ |
2562 | 2648 if(mb_y==mb_h) |
2649 return; | |
2650 | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2651 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2652 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2653 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2654 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2655 v >>= FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2656 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2657 dst8[x + y*ref_stride]= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2658 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2659 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2660 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2662 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2663 buf[x + y*w]-= 128<<FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2664 } |
2206 | 2665 } |
2138 | 2666 } |
2206 | 2667 |
2668 return; | |
2138 | 2669 } |
2967 | 2670 |
6414 | 2671 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2672 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
2673 block_w*mb_x - block_w/2, | |
2674 block_w*mb_y - block_w/2, | |
2675 block_w, block_w, | |
2676 w, h, | |
2677 w, ref_stride, obmc_stride, | |
2678 mb_x - 1, mb_y - 1, | |
2679 add, 1, plane_index); | |
2680 } | |
2562 | 2681 } |
2682 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2683 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2562 | 2684 const int mb_h= s->b_height << s->block_max_depth; |
2685 int mb_y; | |
2686 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2687 predict_slice(s, buf, plane_index, add, mb_y); | |
2138 | 2688 } |
2689 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2690 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2691 int i, x2, y2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2692 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2693 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2694 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2695 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2696 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2697 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2698 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2699 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2700 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2701 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2702 const int h= p->height; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2703 int index= mb_x + mb_y*b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2704 BlockNode *b= &s->block[index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2705 BlockNode backup= *b; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2706 int ab=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2707 int aa=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2708 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2709 b->type|= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2710 b->color[plane_index]= 0; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2711 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2712 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2713 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2714 int mb_x2= mb_x + (i &1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2715 int mb_y2= mb_y + (i>>1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2716 int x= block_w*mb_x2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2717 int y= block_w*mb_y2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2718 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2719 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2720 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2721 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2722 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2723 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2724 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2725 int obmc_v= obmc[index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2726 int d; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2727 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2728 if(x<0) obmc_v += obmc[index + block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2729 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2730 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
6412 | 2731 //FIXME precalculate this or simplify it somehow else |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2732 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2733 d = -dst[index] + (1<<(FRAC_BITS-1)); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2734 dst[index] = d; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2735 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
6412 | 2736 aa += obmc_v * obmc_v; //FIXME precalculate this |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2737 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2738 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2739 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2740 *b= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2741 |
5127 | 2742 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2743 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2744 |
3051 | 2745 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
2746 const int b_stride = s->b_width << s->block_max_depth; | |
2747 const int b_height = s->b_height<< s->block_max_depth; | |
2748 int index= x + y*b_stride; | |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2749 const BlockNode *b = &s->block[index]; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2750 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2751 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2752 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2753 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
3051 | 2754 int dmx, dmy; |
4001 | 2755 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
2756 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
3051 | 2757 |
2758 if(x<0 || x>=b_stride || y>=b_height) | |
2759 return 0; | |
2760 /* | |
2761 1 0 0 | |
2762 01X 1-2 1 | |
2763 001XX 3-6 2-3 | |
2764 0001XXX 7-14 4-7 | |
2765 00001XXXX 15-30 8-15 | |
2766 */ | |
2767 //FIXME try accurate rate | |
6412 | 2768 //FIXME intra and inter predictors if surrounding blocks are not the same type |
3051 | 2769 if(b->type & BLOCK_INTRA){ |
4001 | 2770 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
2771 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
2772 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2773 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2774 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2775 dmx-= b->mx; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2776 dmy-= b->my; |
4001 | 2777 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
2778 + av_log2(2*FFABS(dmy)) | |
3314 | 2779 + av_log2(2*b->ref)); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2780 } |
3051 | 2781 } |
2782 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2783 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2784 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2785 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2786 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2787 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2788 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2789 uint8_t *dst= s->current_picture.data[plane_index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2790 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2791 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
2792 uint8_t *cur = s->scratchbuf; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2793 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2794 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2795 const int b_height = s->b_height<< s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2796 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2797 const int h= p->height; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2798 int distortion; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2799 int rate= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2800 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2801 int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2802 int sy= block_w*mb_y - block_w/2; |
3206 | 2803 int x0= FFMAX(0,-sx); |
2804 int y0= FFMAX(0,-sy); | |
2805 int x1= FFMIN(block_w*2, w-sx); | |
2806 int y1= FFMIN(block_w*2, h-sy); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2807 int i,x,y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2808 |
3314 | 2809 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2810 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2811 for(y=y0; y<y1; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2812 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2813 const IDWTELEM *pred1 = pred + y*obmc_stride; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2814 uint8_t *cur1 = cur + y*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2815 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2816 for(x=x0; x<x1; x++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2817 #if FRAC_BITS >= LOG2_OBMC_MAX |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2818 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2819 #else |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2820 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2821 #endif |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2822 v = (v + pred1[x]) >> FRAC_BITS; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2823 if(v&(~255)) v= ~(v>>31); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2824 dst1[x] = v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2825 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2826 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2827 |
3206 | 2828 /* copy the regions where obmc[] = (uint8_t)256 */ |
2829 if(LOG2_OBMC_MAX == 8 | |
2830 && (mb_x == 0 || mb_x == b_stride-1) | |
2831 && (mb_y == 0 || mb_y == b_height-1)){ | |
2832 if(mb_x == 0) | |
2833 x1 = block_w; | |
2834 else | |
2835 x0 = block_w; | |
2836 if(mb_y == 0) | |
2837 y1 = block_w; | |
2838 else | |
2839 y0 = block_w; | |
2840 for(y=y0; y<y1; y++) | |
2841 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
2842 } | |
2843 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2844 if(block_w==16){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2845 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2846 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
6412 | 2847 /* FIXME cmps overlap but do not cover the wavelet's whole support. |
2848 * So improving the score of one block is not strictly guaranteed | |
2849 * to improve the score of the whole frame, thus iterative motion | |
2850 * estimation does not always converge. */ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2851 if(s->avctx->me_cmp == FF_CMP_W97) |
4197 | 2852 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2853 else if(s->avctx->me_cmp == FF_CMP_W53) |
4197 | 2854 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2855 else{ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2856 distortion = 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2857 for(i=0; i<4; i++){ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2858 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2859 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2860 } |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2861 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2862 }else{ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2863 assert(block_w==8); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2864 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2865 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2866 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2867 if(plane_index==0){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2868 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2869 /* ..RRr |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2870 * .RXx. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2871 * rxx.. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2872 */ |
3051 | 2873 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2874 } |
3057 | 2875 if(mb_x == b_stride-2) |
2876 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2877 } |
3051 | 2878 return distortion + rate*penalty_factor; |
2879 } | |
2880 | |
2881 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2882 int i, y2; | |
2883 Plane *p= &s->plane[plane_index]; | |
2884 const int block_size = MB_SIZE >> s->block_max_depth; | |
2885 const int block_w = plane_index ? block_size/2 : block_size; | |
2886 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2887 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2888 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2889 uint8_t *dst= s->current_picture.data[plane_index]; | |
2890 uint8_t *src= s-> input_picture.data[plane_index]; | |
6426 | 2891 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst |
2892 // const has only been removed from zero_dst to suppress a warning | |
2893 static IDWTELEM zero_dst[4096]; //FIXME | |
3051 | 2894 const int b_stride = s->b_width << s->block_max_depth; |
2895 const int w= p->width; | |
2896 const int h= p->height; | |
2897 int distortion= 0; | |
2898 int rate= 0; | |
2899 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
2900 | |
2901 for(i=0; i<9; i++){ | |
2902 int mb_x2= mb_x + (i%3) - 1; | |
2903 int mb_y2= mb_y + (i/3) - 1; | |
2904 int x= block_w*mb_x2 + block_w/2; | |
2905 int y= block_w*mb_y2 + block_w/2; | |
2906 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2907 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3051 | 2908 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
2909 | |
2910 //FIXME find a cleaner/simpler way to skip the outside stuff | |
2911 for(y2= y; y2<0; y2++) | |
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2913 for(y2= h; y2<y+block_w; y2++) | |
2914 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2915 if(x<0){ | |
2916 for(y2= y; y2<y+block_w; y2++) | |
2917 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
2918 } | |
2919 if(x+block_w > w){ | |
2920 for(y2= y; y2<y+block_w; y2++) | |
2921 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
2922 } | |
2923 | |
2924 assert(block_w== 8 || block_w==16); | |
2925 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
2926 } | |
2927 | |
2928 if(plane_index==0){ | |
2929 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
2930 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
2931 | |
2932 /* ..RRRr | |
2933 * .RXXx. | |
2934 * .RXXx. | |
2935 * rxxx. | |
2936 */ | |
2937 if(merged) | |
2938 rate = get_block_bits(s, mb_x, mb_y, 2); | |
2939 for(i=merged?4:0; i<9; i++){ | |
2940 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
2941 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
2942 } | |
2943 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2944 return distortion + rate*penalty_factor; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2945 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2946 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2947 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2948 const int b_stride= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2949 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2950 BlockNode backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2951 int rd, index, value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2952 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2953 assert(mb_x>=0 && mb_y>=0); |
2994 | 2954 assert(mb_x<b_stride); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2955 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2956 if(intra){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2957 block->color[0] = p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2958 block->color[1] = p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2959 block->color[2] = p[2]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2960 block->type |= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2961 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2962 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
3314 | 2963 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2964 if(s->me_cache[index] == value) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2965 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2966 s->me_cache[index]= value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2967 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2968 block->mx= p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2969 block->my= p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2970 block->type &= ~BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2971 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2972 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2973 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2974 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2975 //FIXME chroma |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2976 if(rd < *best_rd){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2977 *best_rd= rd; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2978 return 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2979 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2980 *block= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2981 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2982 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2983 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2984 |
6412 | 2985 /* special case for int[2] args we discard afterwards, |
2986 * fixes compilation problem with gcc 2.95 */ | |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2987 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2988 int p[2] = {p0, p1}; |
3197 | 2989 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2990 } |
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2991 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2992 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
3051 | 2993 const int b_stride= s->b_width << s->block_max_depth; |
2994 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
2995 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
2996 int rd, index, value; | |
2997 | |
2998 assert(mb_x>=0 && mb_y>=0); | |
2999 assert(mb_x<b_stride); | |
3000 assert(((mb_x|mb_y)&1) == 0); | |
3001 | |
3002 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3314 | 3003 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
3051 | 3004 if(s->me_cache[index] == value) |
3005 return 0; | |
3006 s->me_cache[index]= value; | |
3007 | |
3008 block->mx= p0; | |
3009 block->my= p1; | |
3314 | 3010 block->ref= ref; |
3051 | 3011 block->type &= ~BLOCK_INTRA; |
3012 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
3013 | |
3014 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3015 | |
3016 //FIXME chroma | |
3017 if(rd < *best_rd){ | |
3018 *best_rd= rd; | |
3019 return 1; | |
3020 }else{ | |
3021 block[0]= backup[0]; | |
3022 block[1]= backup[1]; | |
3023 block[b_stride]= backup[2]; | |
3024 block[b_stride+1]= backup[3]; | |
3025 return 0; | |
3026 } | |
3027 } | |
3028 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3029 static void iterative_me(SnowContext *s){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3030 int pass, mb_x, mb_y; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3031 const int b_width = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3032 const int b_height= s->b_height << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3033 const int b_stride= b_width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3034 int color[3]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3035 |
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3036 { |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3037 RangeCoder r = s->c; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3038 uint8_t state[sizeof(s->block_state)]; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3039 memcpy(state, s->block_state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3040 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3041 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3042 encode_q_branch(s, 0, mb_x, mb_y); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3043 s->c = r; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3044 memcpy(s->block_state, state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3045 } |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3046 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
3047 for(pass=0; pass<25; pass++){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3048 int change= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3049 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3050 for(mb_y= 0; mb_y<b_height; mb_y++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3051 for(mb_x= 0; mb_x<b_width; mb_x++){ |
3314 | 3052 int dia_change, i, j, ref; |
3053 int best_rd= INT_MAX, ref_rd; | |
3054 BlockNode backup, ref_b; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3055 const int index= mb_x + mb_y * b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3056 BlockNode *block= &s->block[index]; |
3324 | 3057 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
3058 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
3059 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
3060 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
3061 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
3062 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
3063 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
3064 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3065 const int b_w= (MB_SIZE >> s->block_max_depth); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3066 uint8_t obmc_edged[b_w*2][b_w*2]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3067 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3068 if(pass && (block->type & BLOCK_OPT)) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3069 continue; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3070 block->type |= BLOCK_OPT; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3071 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3072 backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3073 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3074 if(!s->me_cache_generation) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3075 memset(s->me_cache, 0, sizeof(s->me_cache)); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3076 s->me_cache_generation += 1<<22; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3077 |
6412 | 3078 //FIXME precalculate |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3079 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3080 int x, y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3081 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3082 if(mb_x==0) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3083 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3084 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3085 if(mb_x==b_stride-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3086 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3087 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3088 if(mb_y==0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3089 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3090 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3091 for(y=1; y<b_w; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3092 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3093 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3094 if(mb_y==b_height-1){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3095 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3096 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3097 for(y=b_w; y<b_w*2-1; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3098 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3099 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3100 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3101 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3102 //skip stuff outside the picture |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
3103 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3104 uint8_t *src= s-> input_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3105 uint8_t *dst= s->current_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3106 const int stride= s->current_picture.linesize[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3107 const int block_w= MB_SIZE >> s->block_max_depth; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3108 const int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3109 const int sy= block_w*mb_y - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3110 const int w= s->plane[0].width; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3111 const int h= s->plane[0].height; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3112 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3113 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3114 for(y=sy; y<0; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3116 for(y=h; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3117 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3118 if(sx<0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3119 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3120 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3121 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3122 if(sx+block_w*2 > w){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3123 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3124 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3125 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3126 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3127 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3128 // intra(black) = neighbors' contribution to the current block |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3129 for(i=0; i<3; i++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3130 color[i]= get_dc(s, mb_x, mb_y, i); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3131 |
5127 | 3132 // get previous score (cannot be cached due to OBMC) |
3057 | 3133 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
3134 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
3135 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
3136 }else | |
3197 | 3137 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
3138 | |
3314 | 3139 ref_b= *block; |
3140 ref_rd= best_rd; | |
3141 for(ref=0; ref < s->ref_frames; ref++){ | |
3142 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
3143 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
3144 continue; | |
3145 block->ref= ref; | |
3146 best_rd= INT_MAX; | |
3147 | |
3148 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
3149 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
3324 | 3150 if(tb) |
3314 | 3151 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
3324 | 3152 if(lb) |
3314 | 3153 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
3324 | 3154 if(rb) |
3314 | 3155 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
3324 | 3156 if(bb) |
3314 | 3157 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
3158 | |
3159 /* fullpel ME */ | |
6412 | 3160 //FIXME avoid subpel interpolation / round to nearest integer |
3314 | 3161 do{ |
3162 dia_change=0; | |
3163 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3164 for(j=0; j<i; j++){ | |
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3167 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3168 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3169 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3170 } |
3314 | 3171 }while(dia_change); |
3172 /* subpel ME */ | |
3173 do{ | |
3174 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3175 dia_change=0; | |
3176 for(i=0; i<8; i++) | |
3177 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
3178 }while(dia_change); | |
3179 //FIXME or try the standard 2 pass qpel or similar | |
3180 | |
3181 mvr[0][0]= block->mx; | |
3182 mvr[0][1]= block->my; | |
3183 if(ref_rd > best_rd){ | |
3184 ref_rd= best_rd; | |
3185 ref_b= *block; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3186 } |
3314 | 3187 } |
3188 best_rd= ref_rd; | |
3189 *block= ref_b; | |
2998 | 3190 #if 1 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3191 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3192 //FIXME RD style color selection |
2998 | 3193 #endif |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3194 if(!same_block(block, &backup)){ |
3324 | 3195 if(tb ) tb ->type &= ~BLOCK_OPT; |
3196 if(lb ) lb ->type &= ~BLOCK_OPT; | |
3197 if(rb ) rb ->type &= ~BLOCK_OPT; | |
3198 if(bb ) bb ->type &= ~BLOCK_OPT; | |
3199 if(tlb) tlb->type &= ~BLOCK_OPT; | |
3200 if(trb) trb->type &= ~BLOCK_OPT; | |
3201 if(blb) blb->type &= ~BLOCK_OPT; | |
3202 if(brb) brb->type &= ~BLOCK_OPT; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3203 change ++; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3204 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3205 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3206 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3207 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3208 if(!change) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3209 break; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3210 } |
3051 | 3211 |
3212 if(s->block_max_depth == 1){ | |
3213 int change= 0; | |
3214 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3215 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3324 | 3216 int i; |
3051 | 3217 int best_rd, init_rd; |
3218 const int index= mb_x + mb_y * b_stride; | |
3219 BlockNode *b[4]; | |
3220 | |
3221 b[0]= &s->block[index]; | |
3222 b[1]= b[0]+1; | |
3223 b[2]= b[0]+b_stride; | |
3224 b[3]= b[2]+1; | |
3225 if(same_block(b[0], b[1]) && | |
3226 same_block(b[0], b[2]) && | |
3227 same_block(b[0], b[3])) | |
3228 continue; | |
3229 | |
3230 if(!s->me_cache_generation) | |
3231 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3232 s->me_cache_generation += 1<<22; | |
3233 | |
3234 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3235 | |
3314 | 3236 //FIXME more multiref search? |
3051 | 3237 check_4block_inter(s, mb_x, mb_y, |
3238 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3314 | 3239 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
3051 | 3240 |
3241 for(i=0; i<4; i++) | |
3242 if(!(b[i]->type&BLOCK_INTRA)) | |
3314 | 3243 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
3051 | 3244 |
3245 if(init_rd != best_rd) | |
3246 change++; | |
3247 } | |
3248 } | |
3249 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3250 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3251 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3252 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
2138 | 3254 const int w= b->width; |
3255 const int h= b->height; | |
4594 | 3256 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
5575 | 3257 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
2150 | 3258 int x,y, thres1, thres2; |
2138 | 3259 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3260 if(s->qlog == LOSSLESS_QLOG){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3261 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3262 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3263 dst[x + y*stride]= src[x + y*stride]; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3264 return; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3265 } |
2967 | 3266 |
2138 | 3267 bias= bias ? 0 : (3*qmul)>>3; |
2150 | 3268 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3269 thres2= 2*thres1; | |
2967 | 3270 |
2138 | 3271 if(!bias){ |
3272 for(y=0; y<h; y++){ | |
3273 for(x=0; x<w; x++){ | |
2150 | 3274 int i= src[x + y*stride]; |
2967 | 3275 |
2150 | 3276 if((unsigned)(i+thres1) > thres2){ |
3277 if(i>=0){ | |
3278 i<<= QEXPSHIFT; | |
3279 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3280 dst[x + y*stride]= i; |
2150 | 3281 }else{ |
3282 i= -i; | |
3283 i<<= QEXPSHIFT; | |
3284 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3285 dst[x + y*stride]= -i; |
2150 | 3286 } |
3287 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3288 dst[x + y*stride]= 0; |
2138 | 3289 } |
3290 } | |
3291 }else{ | |
3292 for(y=0; y<h; y++){ | |
3293 for(x=0; x<w; x++){ | |
2967 | 3294 int i= src[x + y*stride]; |
3295 | |
2150 | 3296 if((unsigned)(i+thres1) > thres2){ |
3297 if(i>=0){ | |
3298 i<<= QEXPSHIFT; | |
3299 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3300 dst[x + y*stride]= i; |
2150 | 3301 }else{ |
3302 i= -i; | |
3303 i<<= QEXPSHIFT; | |
3304 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3305 dst[x + y*stride]= -i; |
2150 | 3306 } |
3307 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3308 dst[x + y*stride]= 0; |
2138 | 3309 } |
3310 } | |
3311 } | |
3312 } | |
3313 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3315 const int w= b->width; |
4594 | 3316 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3317 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3318 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3319 int x,y; |
2967 | 3320 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3321 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3322 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3323 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3324 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3325 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3326 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3327 int i= line[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3328 if(i<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3329 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3330 }else if(i>0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3331 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3332 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3333 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3334 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3335 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3336 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
2138 | 3338 const int w= b->width; |
3339 const int h= b->height; | |
4594 | 3340 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3341 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2138 | 3342 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3343 int x,y; | |
2967 | 3344 |
2161 | 3345 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3346 |
2138 | 3347 for(y=0; y<h; y++){ |
3348 for(x=0; x<w; x++){ | |
3349 int i= src[x + y*stride]; | |
3350 if(i<0){ | |
3351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3352 }else if(i>0){ | |
3353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3354 } | |
3355 } | |
3356 } | |
3357 } | |
3358 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3360 const int w= b->width; |
3361 const int h= b->height; | |
3362 int x,y; | |
2967 | 3363 |
2138 | 3364 for(y=h-1; y>=0; y--){ |
3365 for(x=w-1; x>=0; x--){ | |
3366 int i= x + y*stride; | |
2967 | 3367 |
2138 | 3368 if(x){ |
3369 if(use_median){ | |
3370 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3371 else src[i] -= src[i - 1]; | |
3372 }else{ | |
3373 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3374 else src[i] -= src[i - 1]; | |
3375 } | |
3376 }else{ | |
3377 if(y) src[i] -= src[i - stride]; | |
3378 } | |
3379 } | |
3380 } | |
3381 } | |
3382 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3384 const int w= b->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3385 int x,y; |
2967 | 3386 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3387 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3388 IDWTELEM * prev; |
2967 | 3389 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3390 if (start_y != 0) |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3391 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2967 | 3392 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3393 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3394 prev = line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3395 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3396 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3397 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3398 if(x){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3399 if(use_median){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3400 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3401 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3402 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3403 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3404 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3405 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3406 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3407 if(y) line[x] += prev[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3408 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3409 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3410 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3411 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3412 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3414 const int w= b->width; |
3415 const int h= b->height; | |
3416 int x,y; | |
2967 | 3417 |
2138 | 3418 for(y=0; y<h; y++){ |
3419 for(x=0; x<w; x++){ | |
3420 int i= x + y*stride; | |
2967 | 3421 |
2138 | 3422 if(x){ |
3423 if(use_median){ | |
3424 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3425 else src[i] += src[i - 1]; | |
3426 }else{ | |
3427 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3428 else src[i] += src[i - 1]; | |
3429 } | |
3430 }else{ | |
3431 if(y) src[i] += src[i - stride]; | |
3432 } | |
3433 } | |
3434 } | |
3435 } | |
3436 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3437 static void encode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3438 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3439 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3440 for(plane_index=0; plane_index<2; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3441 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3442 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3443 if(orientation==2) continue; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3444 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3445 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3446 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3447 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3448 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3449 |
2138 | 3450 static void encode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3451 int plane_index, i; |
2967 | 3452 uint8_t kstate[32]; |
3453 | |
3454 memset(kstate, MID_STATE, sizeof(kstate)); | |
2138 | 3455 |
2335 | 3456 put_rac(&s->c, kstate, s->keyframe); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3457 if(s->keyframe || s->always_reset){ |
2199 | 3458 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3459 s->last_spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3460 s->last_qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3461 s->last_qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3462 s->last_mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3463 s->last_block_max_depth= 0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3464 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3465 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3466 p->last_htaps=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3467 p->last_diag_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3468 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3469 } |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3470 } |
2138 | 3471 if(s->keyframe){ |
3472 put_symbol(&s->c, s->header_state, s->version, 0); | |
2335 | 3473 put_rac(&s->c, s->header_state, s->always_reset); |
2138 | 3474 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3475 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3476 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3477 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
3478 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
3479 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
2335 | 3480 put_rac(&s->c, s->header_state, s->spatial_scalability); |
3481 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
3314 | 3482 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
2138 | 3483 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3484 encode_qlogs(s); |
2138 | 3485 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3486 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3487 if(!s->keyframe){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3488 int update_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3489 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3490 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3491 update_mc |= p->last_htaps != p->htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3492 update_mc |= p->last_diag_mc != p->diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3493 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3494 } |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3495 put_rac(&s->c, s->header_state, update_mc); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3496 if(update_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3497 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3498 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3499 put_rac(&s->c, s->header_state, p->diag_mc); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3500 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3501 for(i= p->htaps/2; i; i--) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3502 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3503 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3504 } |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3505 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3506 put_rac(&s->c, s->header_state, 1); |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3507 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3508 encode_qlogs(s); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3509 }else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3510 put_rac(&s->c, s->header_state, 0); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3511 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3512 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3513 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3514 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3515 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3516 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3517 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3518 |
6144 | 3519 } |
3520 | |
3521 static void update_last_header_values(SnowContext *s){ | |
3522 int plane_index; | |
3523 | |
3524 if(!s->keyframe){ | |
3525 for(plane_index=0; plane_index<2; plane_index++){ | |
3526 Plane *p= &s->plane[plane_index]; | |
3527 p->last_diag_mc= p->diag_mc; | |
3528 p->last_htaps = p->htaps; | |
3529 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); | |
3530 } | |
3531 } | |
3532 | |
6414 | 3533 s->last_spatial_decomposition_type = s->spatial_decomposition_type; |
3534 s->last_qlog = s->qlog; | |
3535 s->last_qbias = s->qbias; | |
3536 s->last_mv_scale = s->mv_scale; | |
3537 s->last_block_max_depth = s->block_max_depth; | |
3538 s->last_spatial_decomposition_count = s->spatial_decomposition_count; | |
2138 | 3539 } |
3540 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3541 static void decode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3542 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3543 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3544 for(plane_index=0; plane_index<3; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3545 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3546 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3547 int q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3548 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3549 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3550 else q= get_symbol(&s->c, s->header_state, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3551 s->plane[plane_index].band[level][orientation].qlog= q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3552 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3553 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3554 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3555 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3556 |
9367 | 3557 #define GET_S(dst, check) \ |
3558 tmp= get_symbol(&s->c, s->header_state, 0);\ | |
3559 if(!(check)){\ | |
3560 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\ | |
3561 return -1;\ | |
3562 }\ | |
3563 dst= tmp; | |
3564 | |
2138 | 3565 static int decode_header(SnowContext *s){ |
9365 | 3566 int plane_index, tmp; |
2335 | 3567 uint8_t kstate[32]; |
2138 | 3568 |
2967 | 3569 memset(kstate, MID_STATE, sizeof(kstate)); |
2335 | 3570 |
3571 s->keyframe= get_rac(&s->c, kstate); | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3572 if(s->keyframe || s->always_reset){ |
2199 | 3573 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3574 s->spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3575 s->qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3576 s->qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3577 s->mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3578 s->block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3579 } |
2138 | 3580 if(s->keyframe){ |
9367 | 3581 GET_S(s->version, tmp <= 0U) |
2335 | 3582 s->always_reset= get_rac(&s->c, s->header_state); |
2138 | 3583 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3584 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
9371 | 3585 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) |
2138 | 3586 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); |
3587 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3588 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2335 | 3589 s->spatial_scalability= get_rac(&s->c, s->header_state); |
3590 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
9367 | 3591 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES) |
3592 s->max_ref_frames++; | |
2138 | 3593 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3594 decode_qlogs(s); |
2138 | 3595 } |
2967 | 3596 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3597 if(!s->keyframe){ |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3598 if(get_rac(&s->c, s->header_state)){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3599 for(plane_index=0; plane_index<2; plane_index++){ |
5807 | 3600 int htaps, i, sum=0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3601 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3602 p->diag_mc= get_rac(&s->c, s->header_state); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3603 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
3604 if((unsigned)htaps > HTAPS_MAX || htaps==0) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3605 return -1; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3606 p->htaps= htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3607 for(i= htaps/2; i; i--){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3608 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3609 sum += p->hcoeff[i]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3610 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3611 p->hcoeff[0]= 32-sum; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3612 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3613 s->plane[2].diag_mc= s->plane[1].diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3614 s->plane[2].htaps = s->plane[1].htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3615 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3616 } |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3617 if(get_rac(&s->c, s->header_state)){ |
9371 | 3618 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3619 decode_qlogs(s); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3620 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3621 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3622 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3623 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
9366 | 3624 if(s->spatial_decomposition_type > 1U){ |
2138 | 3625 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
3626 return -1; | |
3627 } | |
9373
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3628 if(FFMIN(s->avctx-> width>>s->chroma_h_shift, |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3629 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){ |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3630 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count); |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3631 return -1; |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3632 } |
2967 | 3633 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3634 s->qlog += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3635 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3636 s->qbias += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3637 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3638 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
2952 | 3639 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
3640 s->block_max_depth= 0; | |
3641 return -1; | |
3642 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3643 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3644 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3645 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3646 |
3075 | 3647 static void init_qexp(void){ |
2600 | 3648 int i; |
3649 double v=128; | |
3650 | |
3651 for(i=0; i<QROOT; i++){ | |
3652 qexp[i]= lrintf(v); | |
2967 | 3653 v *= pow(2, 1.0 / QROOT); |
2600 | 3654 } |
3655 } | |
3656 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
3657 static av_cold int common_init(AVCodecContext *avctx){ |
2138 | 3658 SnowContext *s = avctx->priv_data; |
3659 int width, height; | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3660 int i, j; |
2138 | 3661 |
3662 s->avctx= avctx; | |
9365 | 3663 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe |
2967 | 3664 |
2138 | 3665 dsputil_init(&s->dsp, avctx); |
3666 | |
3667 #define mcf(dx,dy)\ | |
3668 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3669 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3670 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3671 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3672 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3673 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
2138 | 3674 |
3675 mcf( 0, 0) | |
3676 mcf( 4, 0) | |
3677 mcf( 8, 0) | |
3678 mcf(12, 0) | |
3679 mcf( 0, 4) | |
3680 mcf( 4, 4) | |
3681 mcf( 8, 4) | |
3682 mcf(12, 4) | |
3683 mcf( 0, 8) | |
3684 mcf( 4, 8) | |
3685 mcf( 8, 8) | |
3686 mcf(12, 8) | |
3687 mcf( 0,12) | |
3688 mcf( 4,12) | |
3689 mcf( 8,12) | |
3690 mcf(12,12) | |
3691 | |
3692 #define mcfh(dx,dy)\ | |
3693 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3694 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3695 mc_block_hpel ## dx ## dy ## 16;\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3696 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3697 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3698 mc_block_hpel ## dx ## dy ## 8; |
2138 | 3699 |
3700 mcfh(0, 0) | |
3701 mcfh(8, 0) | |
3702 mcfh(0, 8) | |
3703 mcfh(8, 8) | |
2600 | 3704 |
3705 if(!qexp[0]) | |
3706 init_qexp(); | |
3707 | |
2138 | 3708 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
2967 | 3709 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3710 width= s->avctx->width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3711 height= s->avctx->height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3712 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3713 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
6412 | 3714 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here |
2967 | 3715 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3716 for(i=0; i<MAX_REF_FRAMES; i++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3717 for(j=0; j<MAX_REF_FRAMES; j++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3718 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3719 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3720 s->avctx->get_buffer(s->avctx, &s->mconly_picture); |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
3721 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3722 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3723 return 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3724 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3725 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3726 static int common_init_after_header(AVCodecContext *avctx){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3727 SnowContext *s = avctx->priv_data; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3728 int plane_index, level, orientation; |
2967 | 3729 |
3730 for(plane_index=0; plane_index<3; plane_index++){ | |
2138 | 3731 int w= s->avctx->width; |
3732 int h= s->avctx->height; | |
3733 | |
3734 if(plane_index){ | |
3735 w>>= s->chroma_h_shift; | |
3736 h>>= s->chroma_v_shift; | |
3737 } | |
3738 s->plane[plane_index].width = w; | |
3739 s->plane[plane_index].height= h; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3740 |
2138 | 3741 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3742 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3743 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 3744 |
2138 | 3745 b->buf= s->spatial_dwt_buffer; |
3746 b->level= level; | |
3747 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3748 b->width = (w + !(orientation&1))>>1; | |
3749 b->height= (h + !(orientation>1))>>1; | |
2967 | 3750 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3751 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3752 b->buf_x_offset = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3753 b->buf_y_offset = 0; |
2967 | 3754 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3755 if(orientation&1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3756 b->buf += (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3757 b->buf_x_offset = (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3758 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3759 if(orientation>1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3760 b->buf += b->stride>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3761 b->buf_y_offset = b->stride_line >> 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3762 } |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3763 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
2967 | 3764 |
2138 | 3765 if(level) |
3766 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3767 //FIXME avoid this realloc |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3768 av_freep(&b->x_coeff); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3769 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2138 | 3770 } |
3771 w= (w+1)>>1; | |
3772 h= (h+1)>>1; | |
3773 } | |
3774 } | |
2967 | 3775 |
2138 | 3776 return 0; |
3777 } | |
3778 | |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3779 static int qscale2qlog(int qscale){ |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3780 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3781 + 61*QROOT/8; //<64 >60 |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3782 } |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3783 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3784 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
3313 | 3785 { |
6412 | 3786 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. |
3313 | 3787 * FIXME we know exact mv bits at this point, |
3788 * but ratecontrol isn't set up to include them. */ | |
3789 uint32_t coef_sum= 0; | |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3790 int level, orientation, delta_qlog; |
3313 | 3791 |
3792 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3793 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3794 SubBand *b= &s->plane[0].band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3795 IDWTELEM *buf= b->ibuf; |
3313 | 3796 const int w= b->width; |
3797 const int h= b->height; | |
3798 const int stride= b->stride; | |
4594 | 3799 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3313 | 3800 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3801 const int qdiv= (1<<16)/qmul; | |
3802 int x, y; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3803 //FIXME this is ugly |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3804 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3805 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3806 buf[x+y*stride]= b->buf[x+y*stride]; |
3313 | 3807 if(orientation==0) |
3808 decorrelate(s, b, buf, stride, 1, 0); | |
3809 for(y=0; y<h; y++) | |
3810 for(x=0; x<w; x++) | |
3811 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
3812 } | |
3813 } | |
3814 | |
3815 /* ugly, ratecontrol just takes a sqrt again */ | |
3816 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
3817 assert(coef_sum < INT_MAX); | |
3818 | |
6481 | 3819 if(pict->pict_type == FF_I_TYPE){ |
3313 | 3820 s->m.current_picture.mb_var_sum= coef_sum; |
3821 s->m.current_picture.mc_mb_var_sum= 0; | |
3822 }else{ | |
3823 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
3824 s->m.current_picture.mb_var_sum= 0; | |
3825 } | |
3826 | |
3827 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
3766 | 3828 if (pict->quality < 0) |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3829 return INT_MIN; |
3313 | 3830 s->lambda= pict->quality * 3/2; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3831 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3832 s->qlog+= delta_qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3833 return delta_qlog; |
3313 | 3834 } |
2138 | 3835 |
5909 | 3836 static void calculate_visual_weight(SnowContext *s, Plane *p){ |
2138 | 3837 int width = p->width; |
3838 int height= p->height; | |
2198 | 3839 int level, orientation, x, y; |
2138 | 3840 |
3841 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3842 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3843 SubBand *b= &p->band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3844 IDWTELEM *ibuf= b->ibuf; |
2138 | 3845 int64_t error=0; |
2967 | 3846 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3847 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3848 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3849 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2138 | 3850 for(y=0; y<height; y++){ |
3851 for(x=0; x<width; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3852 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
2138 | 3853 error += d*d; |
3854 } | |
3855 } | |
3856 | |
3857 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
3858 } | |
3859 } | |
3860 } | |
3861 | |
5702 | 3862 #define QUANTIZE2 0 |
3863 | |
3864 #if QUANTIZE2==1 | |
3865 #define Q2_STEP 8 | |
3866 | |
3867 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
3868 SubBand *b= &p->band[level][orientation]; | |
3869 int x, y; | |
3870 int xo=0; | |
3871 int yo=0; | |
3872 int step= 1 << (s->spatial_decomposition_count - level); | |
3873 | |
3874 if(orientation&1) | |
3875 xo= step>>1; | |
3876 if(orientation&2) | |
3877 yo= step>>1; | |
3878 | |
6412 | 3879 //FIXME bias for nonzero ? |
5702 | 3880 //FIXME optimize |
3881 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
3882 for(y=0; y<p->height; y++){ | |
3883 for(x=0; x<p->width; x++){ | |
3884 int sx= (x-xo + step/2) / step / Q2_STEP; | |
3885 int sy= (y-yo + step/2) / step / Q2_STEP; | |
3886 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
3887 assert(sx>=0 && sy>=0 && sx < score_stride); | |
3888 v= ((v+8)>>4)<<4; | |
3889 score[sx + sy*score_stride] += v*v; | |
3890 assert(score[sx + sy*score_stride] >= 0); | |
3891 } | |
3892 } | |
3893 } | |
3894 | |
3895 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
3896 int level, orientation; | |
3897 | |
3898 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3899 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3900 SubBand *b= &p->band[level][orientation]; | |
3901 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3902 | |
3903 dequantize(s, b, dst, b->stride); | |
3904 } | |
3905 } | |
3906 } | |
3907 | |
3908 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
3909 int level, orientation, ys, xs, x, y, pass; | |
3910 IDWTELEM best_dequant[height * stride]; | |
3911 IDWTELEM idwt2_buffer[height * stride]; | |
3912 const int score_stride= (width + 10)/Q2_STEP; | |
3913 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3914 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3915 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
3916 | |
3917 //FIXME pass the copy cleanly ? | |
3918 | |
3919 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
3920 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3921 | |
3922 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3923 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3924 SubBand *b= &p->band[level][orientation]; | |
3925 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3926 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
6412 | 3927 assert(src == b->buf); // code does not depend on this but it is true currently |
5702 | 3928 |
3929 quantize(s, b, dst, src, b->stride, s->qbias); | |
3930 } | |
3931 } | |
3932 for(pass=0; pass<1; pass++){ | |
3933 if(s->qbias == 0) //keyframe | |
3934 continue; | |
3935 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3936 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3937 SubBand *b= &p->band[level][orientation]; | |
3938 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3939 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3940 | |
3941 for(ys= 0; ys<Q2_STEP; ys++){ | |
3942 for(xs= 0; xs<Q2_STEP; xs++){ | |
3943 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3944 dequantize_all(s, p, idwt2_buffer, width, height); | |
3945 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3946 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3947 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3948 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3949 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3950 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
3951 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
6412 | 3952 //FIXME try more than just -- |
5702 | 3953 } |
3954 } | |
3955 dequantize_all(s, p, idwt2_buffer, width, height); | |
3956 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3957 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3958 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3959 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3960 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
3961 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
3962 best_score[score_idx]= score[score_idx]; | |
3963 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
3964 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
3965 //FIXME copy instead | |
3966 } | |
3967 } | |
3968 } | |
3969 } | |
3970 } | |
3971 } | |
3972 } | |
3973 } | |
6412 | 3974 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end |
5702 | 3975 } |
3976 | |
5910 | 3977 #endif /* QUANTIZE2==1 */ |
5702 | 3978 |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
3979 static av_cold int encode_init(AVCodecContext *avctx) |
2138 | 3980 { |
3981 SnowContext *s = avctx->priv_data; | |
2198 | 3982 int plane_index; |
2138 | 3983 |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3984 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
6412 | 3985 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
3986 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
2151 | 3987 return -1; |
3988 } | |
2967 | 3989 |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3990 if(avctx->prediction_method == DWT_97 |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3991 && (avctx->flags & CODEC_FLAG_QSCALE) |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3992 && avctx->global_quality == 0){ |
6412 | 3993 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3994 return -1; |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3995 } |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3996 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3997 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3998 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3999 s->chroma_h_shift= 1; //FIXME XXX |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4000 s->chroma_v_shift= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4001 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4002 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4003 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4004 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4005 for(plane_index=0; plane_index<3; plane_index++){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4006 s->plane[plane_index].diag_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4007 s->plane[plane_index].htaps= 6; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4008 s->plane[plane_index].hcoeff[0]= 40; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4009 s->plane[plane_index].hcoeff[1]= -10; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4010 s->plane[plane_index].hcoeff[2]= 2; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4011 s->plane[plane_index].fast_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4012 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4013 |
2138 | 4014 common_init(avctx); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4015 alloc_blocks(s); |
2967 | 4016 |
2138 | 4017 s->version=0; |
2967 | 4018 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4019 s->m.avctx = avctx; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4020 s->m.flags = avctx->flags; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4021 s->m.bit_rate= avctx->bit_rate; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4022 |
8239
dcfdb3352dde
Fix regression test failure with pthreads on multiprocessor systems.
michael
parents:
8214
diff
changeset
|
4023 s->m.me.temp = |
2138 | 4024 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
4025 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4026 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4027 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
2138 | 4028 h263_encode_init(&s->m); //mv_penalty |
4029 | |
3314 | 4030 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
4031 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4032 if(avctx->flags&CODEC_FLAG_PASS1){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4033 if(!avctx->stats_out) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4034 avctx->stats_out = av_mallocz(256); |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4035 } |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4036 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4037 if(ff_rate_control_init(&s->m) < 0) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4038 return -1; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4039 } |
3313 | 4040 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4041 |
2138 | 4042 avctx->coded_frame= &s->current_picture; |
4043 switch(avctx->pix_fmt){ | |
4044 // case PIX_FMT_YUV444P: | |
4045 // case PIX_FMT_YUV422P: | |
4046 case PIX_FMT_YUV420P: | |
4047 case PIX_FMT_GRAY8: | |
4048 // case PIX_FMT_YUV411P: | |
4049 // case PIX_FMT_YUV410P: | |
4050 s->colorspace_type= 0; | |
4051 break; | |
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
4052 /* case PIX_FMT_RGB32: |
2138 | 4053 s->colorspace= 1; |
4054 break;*/ | |
4055 default: | |
5908 | 4056 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); |
2138 | 4057 return -1; |
4058 } | |
4059 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
4060 s->chroma_h_shift= 1; | |
4061 s->chroma_v_shift= 1; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4062 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4063 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4064 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4065 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4066 s->avctx->get_buffer(s->avctx, &s->input_picture); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4067 |
3314 | 4068 if(s->avctx->me_method == ME_ITER){ |
4069 int i; | |
4070 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
4071 for(i=0; i<s->max_ref_frames; i++){ | |
4072 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
4073 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
4074 } | |
4075 } | |
4076 | |
2138 | 4077 return 0; |
4078 } | |
4079 | |
5924 | 4080 #define USE_HALFPEL_PLANE 0 |
4081 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4082 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4083 int p,x,y; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4084 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4085 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4086 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4087 for(p=0; p<3; p++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4088 int is_chroma= !!p; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4089 int w= s->avctx->width >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4090 int h= s->avctx->height >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4091 int ls= frame->linesize[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4092 uint8_t *src= frame->data[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4093 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4094 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4095 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4096 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4097 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4098 halfpel[0][p]= src; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4099 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4100 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4101 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4102 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4103 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4104 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4105 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4106 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4107 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4108 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4109 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4110 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4111 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4112 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4113 src= halfpel[1][p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4114 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4115 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4116 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4117 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4118 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4119 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4120 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4121 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4122 //FIXME border! |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4123 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4124 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4125 |
9375
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4126 static void release_buffer(AVCodecContext *avctx){ |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4127 SnowContext *s = avctx->priv_data; |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4128 int i; |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4129 |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4130 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4131 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4132 for(i=0; i<9; i++) |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4133 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4134 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4135 } |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4136 } |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4137 |
2138 | 4138 static int frame_start(SnowContext *s){ |
4139 AVFrame tmp; | |
2187 | 4140 int w= s->avctx->width; //FIXME round up to x16 ? |
4141 int h= s->avctx->height; | |
2138 | 4142 |
2187 | 4143 if(s->current_picture.data[0]){ |
6437 | 4144 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); |
4145 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
4146 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
2187 | 4147 } |
4148 | |
9376
e376facae584
Make sure the next used frame is released so get_buffer() wont fail.
michael
parents:
9375
diff
changeset
|
4149 release_buffer(s->avctx); |
e376facae584
Make sure the next used frame is released so get_buffer() wont fail.
michael
parents:
9375
diff
changeset
|
4150 |
3314 | 4151 tmp= s->last_picture[s->max_ref_frames-1]; |
4152 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4153 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
5924 | 4154 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4155 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); |
3314 | 4156 s->last_picture[0]= s->current_picture; |
2138 | 4157 s->current_picture= tmp; |
2967 | 4158 |
3314 | 4159 if(s->keyframe){ |
4160 s->ref_frames= 0; | |
4161 }else{ | |
4162 int i; | |
4163 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
4164 if(i && s->last_picture[i-1].key_frame) | |
4165 break; | |
4166 s->ref_frames= i; | |
9372
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4167 if(s->ref_frames==0){ |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4168 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4169 return -1; |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4170 } |
3314 | 4171 } |
4172 | |
2138 | 4173 s->current_picture.reference= 1; |
4174 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
4175 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
4176 return -1; | |
4177 } | |
2967 | 4178 |
3314 | 4179 s->current_picture.key_frame= s->keyframe; |
4180 | |
2138 | 4181 return 0; |
4182 } | |
4183 | |
4184 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
4185 SnowContext *s = avctx->priv_data; | |
2335 | 4186 RangeCoder * const c= &s->c; |
2138 | 4187 AVFrame *pict = data; |
4188 const int width= s->avctx->width; | |
4189 const int height= s->avctx->height; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4190 int level, orientation, plane_index, i, y; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4191 uint8_t rc_header_bak[sizeof(s->header_state)]; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4192 uint8_t rc_block_bak[sizeof(s->block_state)]; |
2138 | 4193 |
2335 | 4194 ff_init_range_encoder(c, buf, buf_size); |
4195 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2967 | 4196 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4197 for(i=0; i<3; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4198 int shift= !!i; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4199 for(y=0; y<(height>>shift); y++) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4200 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4201 &pict->data[i][y * pict->linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4202 width>>shift); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4203 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4204 s->new_picture = *pict; |
2138 | 4205 |
3313 | 4206 s->m.picture_number= avctx->frame_number; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4207 if(avctx->flags&CODEC_FLAG_PASS2){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4208 s->m.pict_type = |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4209 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4210 s->keyframe= pict->pict_type==FF_I_TYPE; |
3766 | 4211 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
3193 | 4212 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
3766 | 4213 if (pict->quality < 0) |
4214 return -1; | |
4215 } | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4216 }else{ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4217 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3313 | 4218 s->m.pict_type= |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4219 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4220 } |
2967 | 4221 |
3313 | 4222 if(s->pass1_rc && avctx->frame_number == 0) |
4223 pict->quality= 2*FF_QP2LAMBDA; | |
2161 | 4224 if(pict->quality){ |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4225 s->qlog= qscale2qlog(pict->quality); |
3313 | 4226 s->lambda = pict->quality * 3/2; |
4227 } | |
4228 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
2161 | 4229 s->qlog= LOSSLESS_QLOG; |
3313 | 4230 s->lambda = 0; |
6412 | 4231 }//else keep previous frame's qlog until after motion estimation |
2138 | 4232 |
4233 frame_start(s); | |
4234 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4235 s->m.current_picture_ptr= &s->m.current_picture; |
6481 | 4236 if(pict->pict_type == FF_P_TYPE){ |
2138 | 4237 int block_width = (width +15)>>4; |
4238 int block_height= (height+15)>>4; | |
4239 int stride= s->current_picture.linesize[0]; | |
2967 | 4240 |
2138 | 4241 assert(s->current_picture.data[0]); |
3314 | 4242 assert(s->last_picture[0].data[0]); |
2967 | 4243 |
2138 | 4244 s->m.avctx= s->avctx; |
4245 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3314 | 4246 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
2138 | 4247 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
4248 s->m. last_picture_ptr= &s->m. last_picture; | |
4249 s->m.linesize= | |
4250 s->m. last_picture.linesize[0]= | |
4251 s->m. new_picture.linesize[0]= | |
4252 s->m.current_picture.linesize[0]= stride; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4253 s->m.uvlinesize= s->current_picture.linesize[1]; |
2138 | 4254 s->m.width = width; |
4255 s->m.height= height; | |
4256 s->m.mb_width = block_width; | |
4257 s->m.mb_height= block_height; | |
4258 s->m.mb_stride= s->m.mb_width+1; | |
4259 s->m.b8_stride= 2*s->m.mb_width+1; | |
4260 s->m.f_code=1; | |
4261 s->m.pict_type= pict->pict_type; | |
4262 s->m.me_method= s->avctx->me_method; | |
4263 s->m.me.scene_change_score=0; | |
4264 s->m.flags= s->avctx->flags; | |
4265 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
4266 s->m.out_format= FMT_H263; | |
4267 s->m.unrestricted_mv= 1; | |
4268 | |
3313 | 4269 s->m.lambda = s->lambda; |
2138 | 4270 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4271 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4272 |
2138 | 4273 s->m.dsp= s->dsp; //move |
4274 ff_init_me(&s->m); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4275 s->dsp= s->m.dsp; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4276 } |
2967 | 4277 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4278 if(s->pass1_rc){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4279 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4280 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4281 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4282 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4283 redo_frame: |
2967 | 4284 |
6481 | 4285 if(pict->pict_type == FF_I_TYPE) |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4286 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4287 else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4288 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4289 |
3313 | 4290 s->m.pict_type = pict->pict_type; |
6481 | 4291 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0; |
2138 | 4292 |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4293 common_init_after_header(avctx); |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4294 |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4295 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4296 for(plane_index=0; plane_index<3; plane_index++){ |
5909 | 4297 calculate_visual_weight(s, &s->plane[plane_index]); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4298 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4299 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4300 |
2138 | 4301 encode_header(s); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4302 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4303 encode_blocks(s, 1); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4304 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
2967 | 4305 |
2138 | 4306 for(plane_index=0; plane_index<3; plane_index++){ |
4307 Plane *p= &s->plane[plane_index]; | |
4308 int w= p->width; | |
4309 int h= p->height; | |
4310 int x, y; | |
2198 | 4311 // int bits= put_bits_count(&s->c.pb); |
2138 | 4312 |
6414 | 4313 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
4314 //FIXME optimize | |
4315 if(pict->data[plane_index]) //FIXME gray hack | |
4316 for(y=0; y<h; y++){ | |
4317 for(x=0; x<w; x++){ | |
4318 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; | |
4319 } | |
4320 } | |
4321 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); | |
4322 | |
4323 if( plane_index==0 | |
6481 | 4324 && pict->pict_type == FF_P_TYPE |
6414 | 4325 && !(avctx->flags&CODEC_FLAG_PASS2) |
4326 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | |
4327 ff_init_range_encoder(c, buf, buf_size); | |
4328 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
4329 pict->pict_type= FF_I_TYPE; | |
4330 s->keyframe=1; | |
4331 s->current_picture.key_frame=1; | |
4332 goto redo_frame; | |
2138 | 4333 } |
6414 | 4334 |
4335 if(s->qlog == LOSSLESS_QLOG){ | |
4336 for(y=0; y<h; y++){ | |
4337 for(x=0; x<w; x++){ | |
4338 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; | |
4339 } | |
2161 | 4340 } |
6414 | 4341 }else{ |
4342 for(y=0; y<h; y++){ | |
4343 for(x=0; x<w; x++){ | |
4344 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; | |
4345 } | |
5575 | 4346 } |
4347 } | |
6414 | 4348 |
4349 /* if(QUANTIZE2) | |
4350 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); | |
4351 else*/ | |
4352 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4353 | |
4354 if(s->pass1_rc && plane_index==0){ | |
4355 int delta_qlog = ratecontrol_1pass(s, pict); | |
4356 if (delta_qlog <= INT_MIN) | |
4357 return -1; | |
4358 if(delta_qlog){ | |
4359 //reordering qlog in the bitstream would eliminate this reset | |
4360 ff_init_range_encoder(c, buf, buf_size); | |
4361 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); | |
4362 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); | |
4363 encode_header(s); | |
4364 encode_blocks(s, 0); | |
2161 | 4365 } |
4366 } | |
6414 | 4367 |
4368 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4369 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4370 SubBand *b= &p->band[level][orientation]; | |
4371 | |
4372 if(!QUANTIZE2) | |
4373 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); | |
4374 if(orientation==0) | |
6481 | 4375 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0); |
6414 | 4376 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); |
4377 assert(b->parent==NULL || b->parent->stride == b->stride*2); | |
4378 if(orientation==0) | |
4379 correlate(s, b, b->ibuf, b->stride, 1, 0); | |
4380 } | |
4381 } | |
4382 | |
4383 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4384 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4385 SubBand *b= &p->band[level][orientation]; | |
4386 | |
4387 dequantize(s, b, b->ibuf, b->stride); | |
4388 } | |
4389 } | |
4390 | |
4391 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4392 if(s->qlog == LOSSLESS_QLOG){ | |
4393 for(y=0; y<h; y++){ | |
4394 for(x=0; x<w; x++){ | |
4395 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; | |
4396 } | |
4397 } | |
4398 } | |
4399 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4400 }else{ | |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4401 //ME/MC only |
6481 | 4402 if(pict->pict_type == FF_I_TYPE){ |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4403 for(y=0; y<h; y++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4404 for(x=0; x<w; x++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4405 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4406 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4407 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4408 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4409 }else{ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4410 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4411 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4412 } |
6414 | 4413 } |
2138 | 4414 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4415 int64_t error= 0; | |
2967 | 4416 |
6414 | 4417 if(pict->data[plane_index]) //FIXME gray hack |
4418 for(y=0; y<h; y++){ | |
4419 for(x=0; x<w; x++){ | |
4420 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; | |
4421 error += d*d; | |
4422 } | |
2138 | 4423 } |
4424 s->avctx->error[plane_index] += error; | |
2232 | 4425 s->current_picture.error[plane_index] = error; |
2138 | 4426 } |
6414 | 4427 |
2138 | 4428 } |
4429 | |
6144 | 4430 update_last_header_values(s); |
4431 | |
9374 | 4432 release_buffer(avctx); |
2138 | 4433 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4434 s->current_picture.coded_picture_number = avctx->frame_number; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4435 s->current_picture.pict_type = pict->pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4436 s->current_picture.quality = pict->quality; |
3313 | 4437 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
4438 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
4439 s->m.current_picture.display_picture_number = | |
4440 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
4441 s->m.current_picture.quality = pict->quality; | |
4442 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
4443 if(s->pass1_rc) | |
3766 | 4444 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
4445 return -1; | |
3313 | 4446 if(avctx->flags&CODEC_FLAG_PASS1) |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4447 ff_write_pass1_stats(&s->m); |
3313 | 4448 s->m.last_pict_type = s->m.pict_type; |
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4449 avctx->frame_bits = s->m.frame_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4450 avctx->mv_bits = s->m.mv_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4451 avctx->misc_bits = s->m.misc_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4452 avctx->p_tex_bits = s->m.p_tex_bits; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4453 |
2138 | 4454 emms_c(); |
2967 | 4455 |
2335 | 4456 return ff_rac_terminate(c); |
2138 | 4457 } |
4458 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4459 static av_cold void common_end(SnowContext *s){ |
3314 | 4460 int plane_index, level, orientation, i; |
2192 | 4461 |
2138 | 4462 av_freep(&s->spatial_dwt_buffer); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4463 av_freep(&s->spatial_idwt_buffer); |
2138 | 4464 |
8239
dcfdb3352dde
Fix regression test failure with pthreads on multiprocessor systems.
michael
parents:
8214
diff
changeset
|
4465 s->m.me.temp= NULL; |
2967 | 4466 av_freep(&s->m.me.scratchpad); |
2138 | 4467 av_freep(&s->m.me.map); |
4468 av_freep(&s->m.me.score_map); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4469 av_freep(&s->m.obmc_scratchpad); |
2967 | 4470 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4471 av_freep(&s->block); |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
4472 av_freep(&s->scratchbuf); |
2192 | 4473 |
3314 | 4474 for(i=0; i<MAX_REF_FRAMES; i++){ |
4475 av_freep(&s->ref_mvs[i]); | |
4476 av_freep(&s->ref_scores[i]); | |
4477 if(s->last_picture[i].data[0]) | |
4478 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4479 } | |
4480 | |
2967 | 4481 for(plane_index=0; plane_index<3; plane_index++){ |
2192 | 4482 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
4483 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4484 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 4485 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4486 av_freep(&b->x_coeff); |
2192 | 4487 } |
4488 } | |
4489 } | |
2138 | 4490 } |
4491 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4492 static av_cold int encode_end(AVCodecContext *avctx) |
2138 | 4493 { |
4494 SnowContext *s = avctx->priv_data; | |
4495 | |
4496 common_end(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4497 av_free(avctx->stats_out); |
2138 | 4498 |
4499 return 0; | |
4500 } | |
4501 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4502 static av_cold int decode_init(AVCodecContext *avctx) |
2138 | 4503 { |
2635 | 4504 avctx->pix_fmt= PIX_FMT_YUV420P; |
2138 | 4505 |
4506 common_init(avctx); | |
2967 | 4507 |
2138 | 4508 return 0; |
4509 } | |
4510 | |
9355
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4511 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){ |
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4512 const uint8_t *buf = avpkt->data; |
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4513 int buf_size = avpkt->size; |
2138 | 4514 SnowContext *s = avctx->priv_data; |
2335 | 4515 RangeCoder * const c= &s->c; |
2138 | 4516 int bytes_read; |
4517 AVFrame *picture = data; | |
9374 | 4518 int level, orientation, plane_index; |
2138 | 4519 |
2335 | 4520 ff_init_range_decoder(c, buf, buf_size); |
4521 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2138 | 4522 |
4523 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
5668 | 4524 if(decode_header(s)<0) |
4525 return -1; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4526 common_init_after_header(avctx); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4527 |
5662
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4528 // realloc slice buffer for the case that spatial_decomposition_count changed |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4529 slice_buffer_destroy(&s->sb); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4530 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4531 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4532 for(plane_index=0; plane_index<3; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4533 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4534 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4535 && p->hcoeff[1]==-10 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4536 && p->hcoeff[2]==2; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4537 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4538 |
9370 | 4539 alloc_blocks(s); |
2138 | 4540 |
9372
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4541 if(frame_start(s) < 0) |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4542 return -1; |
6412 | 4543 //keyframe flag duplication mess FIXME |
2138 | 4544 if(avctx->debug&FF_DEBUG_PICT_INFO) |
4545 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2967 | 4546 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4547 decode_blocks(s); |
2138 | 4548 |
4549 for(plane_index=0; plane_index<3; plane_index++){ | |
4550 Plane *p= &s->plane[plane_index]; | |
4551 int w= p->width; | |
4552 int h= p->height; | |
4553 int x, y; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4554 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
2967 | 4555 |
6414 | 4556 if(s->avctx->debug&2048){ |
4557 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
4558 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4559 | |
4560 for(y=0; y<h; y++){ | |
4561 for(x=0; x<w; x++){ | |
4562 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
4563 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
4564 } | |
2138 | 4565 } |
4566 } | |
6414 | 4567 |
4568 { | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4569 for(level=0; level<s->spatial_decomposition_count; level++){ |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4570 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4571 SubBand *b= &p->band[level][orientation]; |
6414 | 4572 unpack_coeffs(s, b, b->parent, orientation); |
4573 } | |
4574 } | |
4575 } | |
4576 | |
4577 { | |
4578 const int mb_h= s->b_height << s->block_max_depth; | |
4579 const int block_size = MB_SIZE >> s->block_max_depth; | |
4580 const int block_w = plane_index ? block_size/2 : block_size; | |
4581 int mb_y; | |
8308 | 4582 DWTCompose cs[MAX_DECOMPOSITIONS]; |
6414 | 4583 int yd=0, yq=0; |
4584 int y; | |
4585 int end_y; | |
4586 | |
4587 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4588 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
4589 | |
4590 int slice_starty = block_w*mb_y; | |
4591 int slice_h = block_w*(mb_y+1); | |
4592 if (!(s->keyframe || s->avctx->debug&512)){ | |
4593 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
4594 slice_h -= (block_w >> 1); | |
4595 } | |
4596 | |
4597 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4598 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4599 SubBand *b= &p->band[level][orientation]; | |
4600 int start_y; | |
4601 int end_y; | |
4602 int our_mb_start = mb_y; | |
4603 int our_mb_end = (mb_y + 1); | |
4604 const int extra= 3; | |
4605 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4606 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
4607 if (!(s->keyframe || s->avctx->debug&512)){ | |
4608 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
4609 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4610 } |
6414 | 4611 start_y = FFMIN(b->height, start_y); |
4612 end_y = FFMIN(b->height, end_y); | |
4613 | |
4614 if (start_y != end_y){ | |
4615 if (orientation == 0){ | |
4616 SubBand * correlate_band = &p->band[0][0]; | |
4617 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
4618 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
4619 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
4620 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
4621 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
4622 } | |
4623 else | |
4624 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
4625 } | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4626 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4627 } |
6414 | 4628 |
4629 for(; yd<slice_h; yd+=4){ | |
4630 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
4631 } | |
4632 | |
4633 if(s->qlog == LOSSLESS_QLOG){ | |
4634 for(; yq<slice_h && yq<h; yq++){ | |
4635 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
4636 for(x=0; x<w; x++){ | |
4637 line[x] <<= FRAC_BITS; | |
4638 } | |
2161 | 4639 } |
4640 } | |
6414 | 4641 |
4642 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
4643 | |
4644 y = FFMIN(p->height, slice_starty); | |
4645 end_y = FFMIN(p->height, slice_h); | |
4646 while(y < end_y) | |
4647 slice_buffer_release(&s->sb, y++); | |
2161 | 4648 } |
2562 | 4649 |
6414 | 4650 slice_buffer_flush(&s->sb); |
4651 } | |
4652 | |
2138 | 4653 } |
2967 | 4654 |
2138 | 4655 emms_c(); |
4656 | |
9374 | 4657 release_buffer(avctx); |
2138 | 4658 |
6414 | 4659 if(!(s->avctx->debug&2048)) |
4660 *picture= s->current_picture; | |
4661 else | |
4662 *picture= s->mconly_picture; | |
2967 | 4663 |
2138 | 4664 *data_size = sizeof(AVFrame); |
2967 | 4665 |
2335 | 4666 bytes_read= c->bytestream - c->bytestream_start; |
4667 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2138 | 4668 |
4669 return bytes_read; | |
4670 } | |
4671 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4672 static av_cold int decode_end(AVCodecContext *avctx) |
2138 | 4673 { |
4674 SnowContext *s = avctx->priv_data; | |
4675 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4676 slice_buffer_destroy(&s->sb); |
2967 | 4677 |
2138 | 4678 common_end(s); |
4679 | |
4680 return 0; | |
4681 } | |
4682 | |
4683 AVCodec snow_decoder = { | |
4684 "snow", | |
4685 CODEC_TYPE_VIDEO, | |
4686 CODEC_ID_SNOW, | |
4687 sizeof(SnowContext), | |
4688 decode_init, | |
4689 NULL, | |
4690 decode_end, | |
4691 decode_frame, | |
4692 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
6712 | 4693 NULL, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
4694 .long_name = NULL_IF_CONFIG_SMALL("Snow"), |
2138 | 4695 }; |
4696 | |
8590 | 4697 #if CONFIG_SNOW_ENCODER |
2138 | 4698 AVCodec snow_encoder = { |
4699 "snow", | |
4700 CODEC_TYPE_VIDEO, | |
4701 CODEC_ID_SNOW, | |
4702 sizeof(SnowContext), | |
4703 encode_init, | |
4704 encode_frame, | |
4705 encode_end, | |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
4706 .long_name = NULL_IF_CONFIG_SMALL("Snow"), |
2138 | 4707 }; |
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4708 #endif |
2138 | 4709 |
4710 | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4711 #ifdef TEST |
2138 | 4712 #undef malloc |
4713 #undef free | |
4714 #undef printf | |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4715 |
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4716 #include "libavutil/lfg.h" |
2138 | 4717 |
5934 | 4718 int main(void){ |
2138 | 4719 int width=256; |
4720 int height=256; | |
4721 int buffer[2][width*height]; | |
4722 SnowContext s; | |
4723 int i; | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4724 AVLFG prng; |
2138 | 4725 s.spatial_decomposition_count=6; |
4726 s.spatial_decomposition_type=1; | |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4727 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4728 av_lfg_init(&prng, 1); |
2967 | 4729 |
2138 | 4730 printf("testing 5/3 DWT\n"); |
4731 for(i=0; i<width*height; i++) | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4732 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; |
2967 | 4733 |
2951 | 4734 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4735 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4736 |
2138 | 4737 for(i=0; i<width*height; i++) |
9197 | 4738 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); |
2138 | 4739 |
4740 printf("testing 9/7 DWT\n"); | |
4741 s.spatial_decomposition_type=0; | |
4742 for(i=0; i<width*height; i++) | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4743 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; |
2967 | 4744 |
2951 | 4745 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4746 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4747 |
2138 | 4748 for(i=0; i<width*height; i++) |
9197 | 4749 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); |
2967 | 4750 |
2951 | 4751 #if 0 |
2138 | 4752 printf("testing AC coder\n"); |
4753 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4754 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
2138 | 4755 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4756 |
2138 | 4757 for(i=-256; i<256; i++){ |
4001 | 4758 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
2138 | 4759 } |
2335 | 4760 ff_rac_terminate(&s.c); |
2138 | 4761 |
4762 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4763 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
2138 | 4764 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4765 |
2138 | 4766 for(i=-256; i<256; i++){ |
4767 int j; | |
4768 j= get_symbol(&s.c, s.header_state, 1); | |
4001 | 4769 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
2138 | 4770 } |
2951 | 4771 #endif |
6414 | 4772 { |
4773 int level, orientation, x, y; | |
4774 int64_t errors[8][4]; | |
4775 int64_t g=0; | |
4776 | |
4777 memset(errors, 0, sizeof(errors)); | |
4778 s.spatial_decomposition_count=3; | |
4779 s.spatial_decomposition_type=0; | |
4780 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4781 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4782 int w= width >> (s.spatial_decomposition_count-level); | |
4783 int h= height >> (s.spatial_decomposition_count-level); | |
4784 int stride= width << (s.spatial_decomposition_count-level); | |
4785 DWTELEM *buf= buffer[0]; | |
4786 int64_t error=0; | |
4787 | |
4788 if(orientation&1) buf+=w; | |
4789 if(orientation>1) buf+=stride>>1; | |
4790 | |
4791 memset(buffer[0], 0, sizeof(int)*width*height); | |
4792 buf[w/2 + h/2*stride]= 256*256; | |
4793 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
4794 for(y=0; y<height; y++){ | |
4795 for(x=0; x<width; x++){ | |
4796 int64_t d= buffer[0][x + y*width]; | |
4797 error += d*d; | |
4798 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); | |
4799 } | |
4800 if(FFABS(height/2-y)<9 && level==2) printf("\n"); | |
2138 | 4801 } |
6414 | 4802 error= (int)(sqrt(error)+0.5); |
4803 errors[level][orientation]= error; | |
8611 | 4804 if(g) g=av_gcd(g, error); |
6414 | 4805 else g= error; |
2138 | 4806 } |
4807 } | |
6414 | 4808 printf("static int const visual_weight[][4]={\n"); |
4809 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4810 printf(" {"); | |
4811 for(orientation=0; orientation<4; orientation++){ | |
4812 printf("%8"PRId64",", errors[level][orientation]/g); | |
4813 } | |
4814 printf("},\n"); | |
2138 | 4815 } |
6414 | 4816 printf("};\n"); |
4817 { | |
2138 | 4818 int level=2; |
4819 int w= width >> (s.spatial_decomposition_count-level); | |
6168 | 4820 //int h= height >> (s.spatial_decomposition_count-level); |
2138 | 4821 int stride= width << (s.spatial_decomposition_count-level); |
4822 DWTELEM *buf= buffer[0]; | |
4823 int64_t error=0; | |
4824 | |
4825 buf+=w; | |
4826 buf+=stride>>1; | |
2967 | 4827 |
2138 | 4828 memset(buffer[0], 0, sizeof(int)*width*height); |
4829 #if 1 | |
4830 for(y=0; y<height; y++){ | |
4831 for(x=0; x<width; x++){ | |
4832 int tab[4]={0,2,3,1}; | |
4833 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
4834 } | |
4835 } | |
2951 | 4836 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4837 #else |
4838 for(y=0; y<h; y++){ | |
4839 for(x=0; x<w; x++){ | |
4840 buf[x + y*stride ]=169; | |
4841 buf[x + y*stride-w]=64; | |
4842 } | |
4843 } | |
2951 | 4844 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4845 #endif |
4846 for(y=0; y<height; y++){ | |
4847 for(x=0; x<width; x++){ | |
4848 int64_t d= buffer[0][x + y*width]; | |
4849 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4850 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
2138 | 4851 } |
4001 | 4852 if(FFABS(height/2-y)<9) printf("\n"); |
2138 | 4853 } |
6414 | 4854 } |
4855 | |
2138 | 4856 } |
4857 return 0; | |
4858 } | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4859 #endif /* TEST */ |