Mercurial > libavcodec.hg
annotate snow.c @ 5627:a920d9b58f19 libavcodec
Cosmetics
author | lu_zero |
---|---|
date | Sat, 01 Sep 2007 14:00:47 +0000 |
parents | 946c2db0a093 |
children | 873ea64637d9 |
rev | line source |
---|---|
2138 | 1 /* |
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2138 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2138 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2138 | 19 */ |
20 | |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
2335 | 24 |
25 #include "rangecoder.h" | |
2138 | 26 |
27 #include "mpegvideo.h" | |
28 | |
29 #undef NDEBUG | |
30 #include <assert.h> | |
31 | |
32 static const int8_t quant3[256]={ | |
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
49 }; | |
50 static const int8_t quant3b[256]={ | |
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 }; | |
2596 | 68 static const int8_t quant3bA[256]={ |
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 }; | |
2138 | 86 static const int8_t quant5[256]={ |
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
103 }; | |
104 static const int8_t quant7[256]={ | |
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
121 }; | |
122 static const int8_t quant9[256]={ | |
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
139 }; | |
140 static const int8_t quant11[256]={ | |
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
157 }; | |
158 static const int8_t quant13[256]={ | |
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
175 }; | |
176 | |
177 #if 0 //64*cubic | |
178 static const uint8_t obmc32[1024]={ | |
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, | |
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, | |
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, | |
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, | |
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, | |
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, | |
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, | |
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, | |
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, | |
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, | |
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, | |
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, | |
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, | |
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, | |
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, | |
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, | |
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, | |
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, | |
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, | |
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, | |
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, | |
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, | |
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, | |
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, | |
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, | |
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, | |
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
211 //error:0.000022 | |
212 }; | |
213 static const uint8_t obmc16[256]={ | |
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, | |
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, | |
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, | |
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, | |
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, | |
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, | |
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, | |
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, | |
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, | |
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, | |
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, | |
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, | |
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, | |
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, | |
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
230 //error:0.000033 | |
231 }; | |
232 #elif 1 // 64*linear | |
233 static const uint8_t obmc32[1024]={ | |
3206 | 234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
2138 | 266 //error:0.000020 |
267 }; | |
268 static const uint8_t obmc16[256]={ | |
3206 | 269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
2138 | 285 //error:0.000015 |
286 }; | |
287 #else //64*cos | |
288 static const uint8_t obmc32[1024]={ | |
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, | |
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, | |
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, | |
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, | |
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, | |
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, | |
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, | |
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, | |
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, | |
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, | |
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, | |
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, | |
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, | |
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, | |
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, | |
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, | |
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, | |
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, | |
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, | |
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, | |
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, | |
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, | |
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, | |
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, | |
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, | |
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, | |
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, | |
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
321 //error:0.000022 | |
322 }; | |
323 static const uint8_t obmc16[256]={ | |
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, | |
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, | |
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, | |
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, | |
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, | |
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, | |
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, | |
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, | |
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, | |
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, | |
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, | |
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, | |
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, | |
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, | |
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
340 //error:0.000022 | |
341 }; | |
342 #endif | |
343 | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
344 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 static const uint8_t obmc8[64]={ |
3206 | 346 4, 12, 20, 28, 28, 20, 12, 4, |
347 12, 36, 60, 84, 84, 60, 36, 12, | |
348 20, 60,100,140,140,100, 60, 20, | |
349 28, 84,140,196,196,140, 84, 28, | |
350 28, 84,140,196,196,140, 84, 28, | |
351 20, 60,100,140,140,100, 60, 20, | |
352 12, 36, 60, 84, 84, 60, 36, 12, | |
353 4, 12, 20, 28, 28, 20, 12, 4, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
354 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 static const uint8_t obmc4[16]={ |
3206 | 359 16, 48, 48, 16, |
360 48,144,144, 48, | |
361 48,144,144, 48, | |
362 16, 48, 48, 16, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
363 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 static const uint8_t *obmc_tab[4]={ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
367 obmc32, obmc16, obmc8, obmc4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
372 typedef struct BlockNode{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 int16_t mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t my; |
3314 | 375 uint8_t ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
376 uint8_t color[3]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 //#define TYPE_SPLIT 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 #define BLOCK_INTRA 1 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
380 #define BLOCK_OPT 2 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
381 //#define TYPE_NOCOLOR 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 uint8_t level; //FIXME merge into type? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 }BlockNode; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
385 static const BlockNode null_block= { //FIXME add border maybe |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 .color= {128,128,128}, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .mx= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .my= 0, |
3314 | 389 .ref= 0, |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
390 .type= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .level= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 }; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
394 #define LOG2_MB_SIZE 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define MB_SIZE (1<<LOG2_MB_SIZE) |
5575 | 396 #define ENCODER_EXTRA_BITS 4 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
397 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
398 typedef struct x_and_coeff{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
399 int16_t x; |
2596 | 400 uint16_t coeff; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
401 } x_and_coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
402 |
2138 | 403 typedef struct SubBand{ |
404 int level; | |
405 int stride; | |
406 int width; | |
407 int height; | |
408 int qlog; ///< log(qscale)/log[2^(1/6)] | |
409 DWTELEM *buf; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
410 IDWTELEM *ibuf; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
411 int buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
412 int buf_y_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int stride_line; ///< Stride measured in lines, not pixels. |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 x_and_coeff * x_coeff; |
2138 | 415 struct SubBand *parent; |
416 uint8_t state[/*7*2*/ 7 + 512][32]; | |
417 }SubBand; | |
418 | |
419 typedef struct Plane{ | |
420 int width; | |
421 int height; | |
422 SubBand band[MAX_DECOMPOSITIONS][4]; | |
423 }Plane; | |
424 | |
425 typedef struct SnowContext{ | |
4588
fc155ff94878
cosmetics: Fix another common typo, dependAnt --> dependEnt.
diego
parents:
4494
diff
changeset
|
426 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 427 |
428 AVCodecContext *avctx; | |
2335 | 429 RangeCoder c; |
2138 | 430 DSPContext dsp; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
431 AVFrame new_picture; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
432 AVFrame input_picture; ///< new_picture with the internal linesizes |
2138 | 433 AVFrame current_picture; |
3314 | 434 AVFrame last_picture[MAX_REF_FRAMES]; |
2138 | 435 AVFrame mconly_picture; |
436 // uint8_t q_context[16]; | |
437 uint8_t header_state[32]; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
438 uint8_t block_state[128 + 32*128]; |
2138 | 439 int keyframe; |
2199 | 440 int always_reset; |
2138 | 441 int version; |
442 int spatial_decomposition_type; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
443 int last_spatial_decomposition_type; |
2138 | 444 int temporal_decomposition_type; |
445 int spatial_decomposition_count; | |
446 int temporal_decomposition_count; | |
3314 | 447 int max_ref_frames; |
448 int ref_frames; | |
449 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
450 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
2138 | 451 DWTELEM *spatial_dwt_buffer; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
452 IDWTELEM *spatial_idwt_buffer; |
2138 | 453 int colorspace_type; |
454 int chroma_h_shift; | |
455 int chroma_v_shift; | |
456 int spatial_scalability; | |
457 int qlog; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
458 int last_qlog; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
459 int lambda; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
460 int lambda2; |
3313 | 461 int pass1_rc; |
2138 | 462 int mv_scale; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
463 int last_mv_scale; |
2138 | 464 int qbias; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
465 int last_qbias; |
2138 | 466 #define QBIAS_SHIFT 3 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
467 int b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
468 int b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
469 int block_max_depth; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
470 int last_block_max_depth; |
2138 | 471 Plane plane[MAX_PLANES]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 BlockNode *block; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
473 #define ME_CACHE_SIZE 1024 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
474 int me_cache[ME_CACHE_SIZE]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
475 int me_cache_generation; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
476 slice_buffer sb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
477 |
4588
fc155ff94878
cosmetics: Fix another common typo, dependAnt --> dependEnt.
diego
parents:
4494
diff
changeset
|
478 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 479 }SnowContext; |
480 | |
2562 | 481 typedef struct { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
482 IDWTELEM *b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
483 IDWTELEM *b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
484 IDWTELEM *b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
485 IDWTELEM *b3; |
2562 | 486 int y; |
487 } dwt_compose_t; | |
488 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
489 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
490 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
491 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
492 static void iterative_me(SnowContext *s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
493 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
494 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
495 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
496 int i; |
2967 | 497 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
498 buf->base_buffer = base_buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
499 buf->line_count = line_count; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
500 buf->line_width = line_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
501 buf->data_count = max_allocated_lines; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
502 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
503 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
2967 | 504 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
505 for (i = 0; i < max_allocated_lines; i++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
506 { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
507 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
508 } |
2967 | 509 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 buf->data_stack_top = max_allocated_lines - 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
512 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
513 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
514 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
515 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
516 IDWTELEM * buffer; |
2967 | 517 |
518 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); | |
519 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
520 assert(buf->data_stack_top >= 0); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
521 // assert(!buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 if (buf->line[line]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
523 return buf->line[line]; |
2967 | 524 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
525 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 buffer = buf->data_stack[buf->data_stack_top]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
527 buf->data_stack_top--; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
528 buf->line[line] = buffer; |
2967 | 529 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
530 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); |
2967 | 531 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 return buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
533 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
534 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
535 static void slice_buffer_release(slice_buffer * buf, int line) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
536 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
538 IDWTELEM * buffer; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
539 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 assert(line >= 0 && line < buf->line_count); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
541 assert(buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
543 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 buffer = buf->line[line]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
545 buf->data_stack_top++; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
546 buf->data_stack[buf->data_stack_top] = buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 buf->line[line] = NULL; |
2967 | 548 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
550 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 static void slice_buffer_flush(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 for (i = 0; i < buf->line_count; i++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 if (buf->line[i]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
559 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
560 slice_buffer_release(buf, i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
561 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
563 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 static void slice_buffer_destroy(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 slice_buffer_flush(buf); |
2967 | 569 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
570 for (i = buf->data_count - 1; i >= 0; i--) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
571 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
572 assert(buf->data_stack[i]); |
3190 | 573 av_freep(&buf->data_stack[i]); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
574 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
575 assert(buf->data_stack); |
3190 | 576 av_freep(&buf->data_stack); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
577 assert(buf->line); |
3190 | 578 av_freep(&buf->line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
579 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
580 |
2979 | 581 #ifdef __sgi |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
582 // Avoid a name clash on SGI IRIX |
2979 | 583 #undef qexp |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
584 #endif |
2246 | 585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
2600 | 586 static uint8_t qexp[QROOT]; |
2138 | 587 |
588 static inline int mirror(int v, int m){ | |
2998 | 589 while((unsigned)v > (unsigned)m){ |
590 v=-v; | |
591 if(v<0) v+= 2*m; | |
592 } | |
593 return v; | |
2138 | 594 } |
595 | |
2335 | 596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
2138 | 597 int i; |
598 | |
599 if(v){ | |
4001 | 600 const int a= FFABS(v); |
2138 | 601 const int e= av_log2(a); |
602 #if 1 | |
2967 | 603 const int el= FFMIN(e, 10); |
2335 | 604 put_rac(c, state+0, 0); |
2138 | 605 |
606 for(i=0; i<el; i++){ | |
2335 | 607 put_rac(c, state+1+i, 1); //1..10 |
2138 | 608 } |
609 for(; i<e; i++){ | |
2335 | 610 put_rac(c, state+1+9, 1); //1..10 |
2138 | 611 } |
2335 | 612 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 613 |
614 for(i=e-1; i>=el; i--){ | |
2335 | 615 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
2138 | 616 } |
617 for(; i>=0; i--){ | |
2335 | 618 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 619 } |
620 | |
621 if(is_signed) | |
2335 | 622 put_rac(c, state+11 + el, v < 0); //11..21 |
2138 | 623 #else |
2967 | 624 |
2335 | 625 put_rac(c, state+0, 0); |
2138 | 626 if(e<=9){ |
627 for(i=0; i<e; i++){ | |
2335 | 628 put_rac(c, state+1+i, 1); //1..10 |
2138 | 629 } |
2335 | 630 put_rac(c, state+1+i, 0); |
2138 | 631 |
632 for(i=e-1; i>=0; i--){ | |
2335 | 633 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 634 } |
635 | |
636 if(is_signed) | |
2335 | 637 put_rac(c, state+11 + e, v < 0); //11..21 |
2138 | 638 }else{ |
639 for(i=0; i<e; i++){ | |
2335 | 640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
2138 | 641 } |
2335 | 642 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 643 |
644 for(i=e-1; i>=0; i--){ | |
2335 | 645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
2138 | 646 } |
647 | |
648 if(is_signed) | |
2335 | 649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
2138 | 650 } |
651 #endif | |
652 }else{ | |
2335 | 653 put_rac(c, state+0, 1); |
2138 | 654 } |
655 } | |
656 | |
2335 | 657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
658 if(get_rac(c, state+0)) | |
2138 | 659 return 0; |
660 else{ | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
661 int i, e, a; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
662 e= 0; |
2335 | 663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
664 e++; |
2138 | 665 } |
666 | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
667 a= 1; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
668 for(i=e-1; i>=0; i--){ |
2335 | 669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
670 } |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
671 |
2335 | 672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
2138 | 673 return -a; |
674 else | |
675 return a; | |
676 } | |
677 } | |
678 | |
2335 | 679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
680 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
681 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
682 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
683 assert(v>=0); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
684 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
685 |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
686 while(v >= r){ |
2335 | 687 put_rac(c, state+4+log2, 1); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
688 v -= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
689 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
690 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
691 } |
2335 | 692 put_rac(c, state+4+log2, 0); |
2967 | 693 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
694 for(i=log2-1; i>=0; i--){ |
2335 | 695 put_rac(c, state+31-i, (v>>i)&1); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
696 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
697 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
698 |
2335 | 699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
700 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
701 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
702 int v=0; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
703 |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
704 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
705 |
2335 | 706 while(get_rac(c, state+4+log2)){ |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
707 v+= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
708 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
709 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
710 } |
2967 | 711 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 for(i=log2-1; i>=0; i--){ |
2335 | 713 v+= get_rac(c, state+31-i)<<i; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
714 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
715 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
716 return v; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
717 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
718 |
5627 | 719 static av_always_inline void |
720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
721 int dst_step, int src_step, int ref_step, | |
722 int width, int mul, int add, int shift, | |
723 int highpass, int inverse){ | |
2138 | 724 const int mirror_left= !highpass; |
725 const int mirror_right= (width&1) ^ highpass; | |
726 const int w= (width>>1) - 1 + (highpass & width); | |
727 int i; | |
728 | |
729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
730 if(mirror_left){ | |
731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
732 dst += dst_step; | |
733 src += src_step; | |
734 } | |
2967 | 735 |
2138 | 736 for(i=0; i<w; i++){ |
5627 | 737 dst[i*dst_step] = |
738 LIFT(src[i*src_step], | |
739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
740 inverse); | |
2138 | 741 } |
2967 | 742 |
2138 | 743 if(mirror_right){ |
5627 | 744 dst[w*dst_step] = |
745 LIFT(src[w*src_step], | |
746 ((mul*2*ref[w*ref_step]+add)>>shift), | |
747 inverse); | |
2138 | 748 } |
749 } | |
750 | |
5627 | 751 static av_always_inline void |
752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
753 int dst_step, int src_step, int ref_step, | |
754 int width, int mul, int add, int shift, | |
755 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
756 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
757 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
758 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
759 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
760 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
762 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
764 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
765 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
766 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
767 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
768 for(i=0; i<w; i++){ |
5627 | 769 dst[i*dst_step] = |
770 LIFT(src[i*src_step], | |
771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
772 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
773 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
774 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
775 if(mirror_right){ |
5627 | 776 dst[w*dst_step] = |
777 LIFT(src[w*src_step], | |
778 ((mul*2*ref[w*ref_step]+add)>>shift), | |
779 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
780 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
781 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
782 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
783 #ifndef liftS |
5627 | 784 static av_always_inline void |
785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
786 int dst_step, int src_step, int ref_step, | |
787 int width, int mul, int add, int shift, | |
788 int highpass, int inverse){ | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
789 const int mirror_left= !highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
790 const int mirror_right= (width&1) ^ highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
791 const int w= (width>>1) - 1 + (highpass & width); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
792 int i; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
793 |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
794 assert(shift == 4); |
5627 | 795 #define LIFTS(src, ref, inv) \ |
796 ((inv) ? \ | |
797 (src) + (((ref) + 4*(src))>>shift): \ | |
798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 if(mirror_left){ |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
801 dst += dst_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
802 src += src_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
803 } |
2967 | 804 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
805 for(i=0; i<w; i++){ |
5627 | 806 dst[i*dst_step] = |
807 LIFTS(src[i*src_step], | |
808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
809 inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
810 } |
2967 | 811 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
812 if(mirror_right){ |
5627 | 813 dst[w*dst_step] = |
814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
815 } |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
816 } |
5627 | 817 static av_always_inline void |
818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
819 int dst_step, int src_step, int ref_step, | |
820 int width, int mul, int add, int shift, | |
821 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
822 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
823 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
824 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
825 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
826 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
827 assert(shift == 4); |
5627 | 828 #define LIFTS(src, ref, inv) \ |
829 ((inv) ? \ | |
830 (src) + (((ref) + 4*(src))>>shift): \ | |
831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
835 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
836 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
837 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
838 for(i=0; i<w; i++){ |
5627 | 839 dst[i*dst_step] = |
840 LIFTS(src[i*src_step], | |
841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
842 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
843 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
844 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
845 if(mirror_right){ |
5627 | 846 dst[w*dst_step] = |
847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
848 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
849 } |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
850 #endif |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
851 |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
852 static void horizontal_decompose53i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
853 DWTELEM temp[width]; |
2138 | 854 const int width2= width>>1; |
2893 | 855 int x; |
2138 | 856 const int w2= (width+1)>>1; |
857 | |
858 for(x=0; x<width2; x++){ | |
859 temp[x ]= b[2*x ]; | |
860 temp[x+w2]= b[2*x + 1]; | |
861 } | |
862 if(width&1) | |
863 temp[x ]= b[2*x ]; | |
864 #if 0 | |
2893 | 865 { |
866 int A1,A2,A3,A4; | |
2138 | 867 A2= temp[1 ]; |
868 A4= temp[0 ]; | |
869 A1= temp[0+width2]; | |
870 A1 -= (A2 + A4)>>1; | |
871 A4 += (A1 + 1)>>1; | |
872 b[0+width2] = A1; | |
873 b[0 ] = A4; | |
874 for(x=1; x+1<width2; x+=2){ | |
875 A3= temp[x+width2]; | |
876 A4= temp[x+1 ]; | |
877 A3 -= (A2 + A4)>>1; | |
878 A2 += (A1 + A3 + 2)>>2; | |
879 b[x+width2] = A3; | |
880 b[x ] = A2; | |
881 | |
882 A1= temp[x+1+width2]; | |
883 A2= temp[x+2 ]; | |
884 A1 -= (A2 + A4)>>1; | |
885 A4 += (A1 + A3 + 2)>>2; | |
886 b[x+1+width2] = A1; | |
887 b[x+1 ] = A4; | |
888 } | |
889 A3= temp[width-1]; | |
890 A3 -= A2; | |
891 A2 += (A1 + A3 + 2)>>2; | |
892 b[width -1] = A3; | |
893 b[width2-1] = A2; | |
2893 | 894 } |
2967 | 895 #else |
2138 | 896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
898 #endif | |
899 } | |
900 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 902 int i; |
2967 | 903 |
2138 | 904 for(i=0; i<width; i++){ |
905 b1[i] -= (b0[i] + b2[i])>>1; | |
906 } | |
907 } | |
908 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 910 int i; |
2967 | 911 |
2138 | 912 for(i=0; i<width; i++){ |
913 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
914 } | |
915 } | |
916 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 918 int y; |
2138 | 919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
2967 | 921 |
2138 | 922 for(y=-2; y<height; y+=2){ |
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
925 | |
926 {START_TIMER | |
2998 | 927 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
928 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
2138 | 929 STOP_TIMER("horizontal_decompose53i")} |
2967 | 930 |
2138 | 931 {START_TIMER |
2998 | 932 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
933 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
2138 | 934 STOP_TIMER("vertical_decompose53i*")} |
2967 | 935 |
2138 | 936 b0=b2; |
937 b1=b3; | |
938 } | |
939 } | |
940 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
941 static void horizontal_decompose97i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
942 DWTELEM temp[width]; |
2138 | 943 const int w2= (width+1)>>1; |
944 | |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
945 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
946 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
947 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
2138 | 948 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
949 } | |
950 | |
951 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
952 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 953 int i; |
2967 | 954 |
2138 | 955 for(i=0; i<width; i++){ |
956 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
957 } | |
958 } | |
959 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
960 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 961 int i; |
2967 | 962 |
2138 | 963 for(i=0; i<width; i++){ |
964 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
965 } | |
966 } | |
967 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
968 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 969 int i; |
2967 | 970 |
2138 | 971 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
972 #ifdef liftS |
2138 | 973 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
974 #else |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
975 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
976 #endif |
2138 | 977 } |
978 } | |
979 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
980 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 981 int i; |
2967 | 982 |
2138 | 983 for(i=0; i<width; i++){ |
984 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
985 } | |
986 } | |
987 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
988 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 989 int y; |
2138 | 990 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
991 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
992 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
993 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
2967 | 994 |
2138 | 995 for(y=-4; y<height; y+=2){ |
996 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
997 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
998 | |
999 {START_TIMER | |
2998 | 1000 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
1001 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
2138 | 1002 if(width>400){ |
1003 STOP_TIMER("horizontal_decompose97i") | |
1004 }} | |
2967 | 1005 |
2138 | 1006 {START_TIMER |
2998 | 1007 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
1008 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
1009 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
1010 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
2138 | 1011 |
1012 if(width>400){ | |
1013 STOP_TIMER("vertical_decompose97i") | |
1014 }} | |
2967 | 1015 |
2138 | 1016 b0=b2; |
1017 b1=b3; | |
1018 b2=b4; | |
1019 b3=b5; | |
1020 } | |
1021 } | |
1022 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1023 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2138 | 1024 int level; |
2967 | 1025 |
2164 | 1026 for(level=0; level<decomposition_count; level++){ |
1027 switch(type){ | |
3326 | 1028 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1029 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
2138 | 1030 } |
1031 } | |
1032 } | |
1033 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1034 static void horizontal_compose53i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1035 IDWTELEM temp[width]; |
2138 | 1036 const int width2= width>>1; |
1037 const int w2= (width+1)>>1; | |
2893 | 1038 int x; |
2138 | 1039 |
1040 #if 0 | |
2893 | 1041 int A1,A2,A3,A4; |
2138 | 1042 A2= temp[1 ]; |
1043 A4= temp[0 ]; | |
1044 A1= temp[0+width2]; | |
1045 A1 -= (A2 + A4)>>1; | |
1046 A4 += (A1 + 1)>>1; | |
1047 b[0+width2] = A1; | |
1048 b[0 ] = A4; | |
1049 for(x=1; x+1<width2; x+=2){ | |
1050 A3= temp[x+width2]; | |
1051 A4= temp[x+1 ]; | |
1052 A3 -= (A2 + A4)>>1; | |
1053 A2 += (A1 + A3 + 2)>>2; | |
1054 b[x+width2] = A3; | |
1055 b[x ] = A2; | |
1056 | |
1057 A1= temp[x+1+width2]; | |
1058 A2= temp[x+2 ]; | |
1059 A1 -= (A2 + A4)>>1; | |
1060 A4 += (A1 + A3 + 2)>>2; | |
1061 b[x+1+width2] = A1; | |
1062 b[x+1 ] = A4; | |
1063 } | |
1064 A3= temp[width-1]; | |
1065 A3 -= A2; | |
1066 A2 += (A1 + A3 + 2)>>2; | |
1067 b[width -1] = A3; | |
1068 b[width2-1] = A2; | |
2967 | 1069 #else |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1070 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1071 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
2138 | 1072 #endif |
1073 for(x=0; x<width2; x++){ | |
1074 b[2*x ]= temp[x ]; | |
1075 b[2*x + 1]= temp[x+w2]; | |
1076 } | |
1077 if(width&1) | |
1078 b[2*x ]= temp[x ]; | |
1079 } | |
1080 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1081 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1082 int i; |
2967 | 1083 |
2138 | 1084 for(i=0; i<width; i++){ |
1085 b1[i] += (b0[i] + b2[i])>>1; | |
1086 } | |
1087 } | |
1088 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1089 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1090 int i; |
2967 | 1091 |
2138 | 1092 for(i=0; i<width; i++){ |
1093 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1094 } | |
1095 } | |
1096 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1097 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1098 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1099 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1100 cs->y = -1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1101 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1102 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1103 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1104 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1105 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1106 cs->y = -1; | |
1107 } | |
1108 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 int y= cs->y; |
2967 | 1111 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1112 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1113 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1114 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1115 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1116 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1117 {START_TIMER |
2998 | 1118 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1119 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1120 STOP_TIMER("vertical_compose53i*")} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1121 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1122 {START_TIMER |
2998 | 1123 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1124 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1125 STOP_TIMER("horizontal_compose53i")} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1126 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1127 cs->b0 = b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1128 cs->b1 = b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1129 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1130 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1131 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1132 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1133 int y= cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1134 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1135 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1136 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1137 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
2138 | 1138 |
1139 {START_TIMER | |
2998 | 1140 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1141 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
2138 | 1142 STOP_TIMER("vertical_compose53i*")} |
1143 | |
1144 {START_TIMER | |
2998 | 1145 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1146 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2138 | 1147 STOP_TIMER("horizontal_compose53i")} |
1148 | |
2562 | 1149 cs->b0 = b2; |
1150 cs->b1 = b3; | |
1151 cs->y += 2; | |
1152 } | |
1153 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1154 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1155 dwt_compose_t cs; |
1156 spatial_compose53i_init(&cs, buffer, height, stride); | |
1157 while(cs.y <= height) | |
1158 spatial_compose53i_dy(&cs, buffer, width, height, stride); | |
2967 | 1159 } |
1160 | |
1161 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1162 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1163 IDWTELEM temp[width]; |
2138 | 1164 const int w2= (width+1)>>1; |
1165 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1166 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1167 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1168 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1169 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
2138 | 1170 } |
1171 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1172 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1173 int i; |
2967 | 1174 |
2138 | 1175 for(i=0; i<width; i++){ |
1176 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1177 } | |
1178 } | |
1179 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1180 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1181 int i; |
2967 | 1182 |
2138 | 1183 for(i=0; i<width; i++){ |
1184 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1185 } | |
1186 } | |
1187 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1188 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1189 int i; |
2967 | 1190 |
2138 | 1191 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1192 #ifdef liftS |
2138 | 1193 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1194 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1195 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1196 #endif |
2138 | 1197 } |
1198 } | |
1199 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1200 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1201 int i; |
2967 | 1202 |
2138 | 1203 for(i=0; i<width; i++){ |
1204 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1205 } | |
1206 } | |
1207 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1208 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
2592 | 1209 int i; |
2967 | 1210 |
2592 | 1211 for(i=0; i<width; i++){ |
1212 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1213 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1214 #ifdef liftS |
2592 | 1215 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1216 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1217 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1218 #endif |
2592 | 1219 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1220 } | |
1221 } | |
1222 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1223 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1224 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1225 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1226 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1227 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1228 cs->y = -3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1229 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1230 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1231 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1232 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1233 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1234 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1235 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1236 cs->y = -3; | |
1237 } | |
2138 | 1238 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1239 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1240 int y = cs->y; |
2967 | 1241 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1242 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1243 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1244 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1245 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1246 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1247 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
2967 | 1248 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1249 {START_TIMER |
2592 | 1250 if(y>0 && y+4<height){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1251 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
2592 | 1252 }else{ |
2998 | 1253 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1254 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1255 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1256 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2592 | 1257 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1258 if(width>400){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1259 STOP_TIMER("vertical_compose97i")}} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1260 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1261 {START_TIMER |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1262 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1263 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
3012 | 1264 if(width>400 && y+0<(unsigned)height){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1265 STOP_TIMER("horizontal_compose97i")}} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1266 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1267 cs->b0=b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1268 cs->b1=b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1269 cs->b2=b4; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1270 cs->b3=b5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1271 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1272 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1273 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1274 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1275 int y = cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1276 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1277 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1278 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1279 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1280 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1281 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
2138 | 1282 |
1283 {START_TIMER | |
2998 | 1284 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1285 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1286 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1287 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2138 | 1288 if(width>400){ |
1289 STOP_TIMER("vertical_compose97i")}} | |
1290 | |
1291 {START_TIMER | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1292 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1293 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
2138 | 1294 if(width>400 && b0 <= b2){ |
1295 STOP_TIMER("horizontal_compose97i")}} | |
2562 | 1296 |
1297 cs->b0=b2; | |
1298 cs->b1=b3; | |
1299 cs->b2=b4; | |
1300 cs->b3=b5; | |
1301 cs->y += 2; | |
1302 } | |
1303 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1304 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1305 dwt_compose_t cs; |
1306 spatial_compose97i_init(&cs, buffer, height, stride); | |
1307 while(cs.y <= height) | |
1308 spatial_compose97i_dy(&cs, buffer, width, height, stride); | |
1309 } | |
1310 | |
3075 | 1311 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1312 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1313 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1314 switch(type){ |
3326 | 1315 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1316 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1317 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1318 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1319 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1320 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1321 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1322 int level; |
1323 for(level=decomposition_count-1; level>=0; level--){ | |
1324 switch(type){ | |
3326 | 1325 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1326 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
2562 | 1327 } |
1328 } | |
1329 } | |
1330 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1331 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
2562 | 1332 const int support = type==1 ? 3 : 5; |
1333 int level; | |
1334 if(type==2) return; | |
1335 | |
1336 for(level=decomposition_count-1; level>=0; level--){ | |
1337 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1338 switch(type){ | |
3326 | 1339 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
2562 | 1340 break; |
3326 | 1341 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
2562 | 1342 break; |
1343 } | |
1344 } | |
2138 | 1345 } |
1346 } | |
1347 | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1348 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1349 const int support = type==1 ? 3 : 5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1350 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1351 if(type==2) return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1352 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1353 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1354 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1355 switch(type){ |
3326 | 1356 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1357 break; |
3326 | 1358 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1359 break; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1360 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1361 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1362 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1363 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1364 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1365 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1366 dwt_compose_t cs[MAX_DECOMPOSITIONS]; |
1367 int y; | |
1368 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1369 for(y=0; y<height; y+=4) | |
1370 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
2138 | 1371 } |
1372 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1373 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1374 const int w= b->width; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1375 const int h= b->height; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1376 int x, y; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1377 |
2138 | 1378 if(1){ |
1379 int run=0; | |
2149 | 1380 int runs[w*h]; |
2138 | 1381 int run_index=0; |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1382 int max_index; |
2967 | 1383 |
2138 | 1384 for(y=0; y<h; y++){ |
1385 for(x=0; x<w; x++){ | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1386 int v, p=0; |
2144 | 1387 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1388 v= src[x + y*stride]; |
2138 | 1389 |
1390 if(y){ | |
2149 | 1391 t= src[x + (y-1)*stride]; |
2138 | 1392 if(x){ |
2149 | 1393 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1394 } |
1395 if(x + 1 < w){ | |
2149 | 1396 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1397 } |
1398 } | |
1399 if(x){ | |
2149 | 1400 l= src[x - 1 + y*stride]; |
2144 | 1401 /*if(x > 1){ |
1402 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1403 else ll= src[x - 2 + y*stride]; | |
2138 | 1404 }*/ |
1405 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1406 if(parent){ |
2149 | 1407 int px= x>>1; |
1408 int py= y>>1; | |
2967 | 1409 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1410 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1411 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1412 if(!(/*ll|*/l|lt|t|rt|p)){ |
2138 | 1413 if(v){ |
1414 runs[run_index++]= run; | |
1415 run=0; | |
1416 }else{ | |
1417 run++; | |
1418 } | |
1419 } | |
1420 } | |
1421 } | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1422 max_index= run_index; |
2138 | 1423 runs[run_index++]= run; |
1424 run_index=0; | |
1425 run= runs[run_index++]; | |
1426 | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1427 put_symbol2(&s->c, b->state[30], max_index, 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1428 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1429 put_symbol2(&s->c, b->state[1], run, 3); |
2967 | 1430 |
2138 | 1431 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1432 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
2422 | 1433 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1434 return -1; | |
1435 } | |
2138 | 1436 for(x=0; x<w; x++){ |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1437 int v, p=0; |
2144 | 1438 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1439 v= src[x + y*stride]; |
2138 | 1440 |
1441 if(y){ | |
2149 | 1442 t= src[x + (y-1)*stride]; |
2138 | 1443 if(x){ |
2149 | 1444 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1445 } |
1446 if(x + 1 < w){ | |
2149 | 1447 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1448 } |
1449 } | |
1450 if(x){ | |
2149 | 1451 l= src[x - 1 + y*stride]; |
2144 | 1452 /*if(x > 1){ |
1453 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1454 else ll= src[x - 2 + y*stride]; | |
2138 | 1455 }*/ |
1456 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1457 if(parent){ |
2149 | 1458 int px= x>>1; |
1459 int py= y>>1; | |
2967 | 1460 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1461 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1462 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1463 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1464 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
2144 | 1465 |
2335 | 1466 put_rac(&s->c, &b->state[0][context], !!v); |
2138 | 1467 }else{ |
1468 if(!run){ | |
1469 run= runs[run_index++]; | |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1470 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1471 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1472 put_symbol2(&s->c, b->state[1], run, 3); |
2138 | 1473 assert(v); |
1474 }else{ | |
1475 run--; | |
1476 assert(!v); | |
1477 } | |
1478 } | |
1479 if(v){ | |
4001 | 1480 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
1481 int l2= 2*FFABS(l) + (l<0); | |
1482 int t2= 2*FFABS(t) + (t<0); | |
1483 | |
1484 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
2596 | 1485 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
2138 | 1486 } |
1487 } | |
1488 } | |
1489 } | |
2422 | 1490 return 0; |
2138 | 1491 } |
1492 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1493 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1494 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1495 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
2422 | 1496 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1497 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1498 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1499 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1500 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
2138 | 1501 const int w= b->width; |
1502 const int h= b->height; | |
1503 int x,y; | |
2967 | 1504 |
2138 | 1505 if(1){ |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1506 int run, runs; |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1507 x_and_coeff *xc= b->x_coeff; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1508 x_and_coeff *prev_xc= NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1509 x_and_coeff *prev2_xc= xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1510 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1511 x_and_coeff *prev_parent_xc= parent_xc; |
2138 | 1512 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1513 runs= get_symbol2(&s->c, b->state[30], 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1514 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1515 else run= INT_MAX; |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1516 |
2138 | 1517 for(y=0; y<h; y++){ |
2193 | 1518 int v=0; |
1519 int lt=0, t=0, rt=0; | |
1520 | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1521 if(y && prev_xc->x == 0){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1522 rt= prev_xc->coeff; |
2193 | 1523 } |
2138 | 1524 for(x=0; x<w; x++){ |
2193 | 1525 int p=0; |
1526 const int l= v; | |
2967 | 1527 |
2193 | 1528 lt= t; t= rt; |
1529 | |
2194 | 1530 if(y){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1531 if(prev_xc->x <= x) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1532 prev_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1533 if(prev_xc->x == x + 1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1534 rt= prev_xc->coeff; |
2194 | 1535 else |
1536 rt=0; | |
1537 } | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1538 if(parent_xc){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1539 if(x>>1 > parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1540 parent_xc++; |
2192 | 1541 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1542 if(x>>1 == parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1543 p= parent_xc->coeff; |
2194 | 1544 } |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1545 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1546 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1547 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
2144 | 1548 |
2335 | 1549 v=get_rac(&s->c, &b->state[0][context]); |
2605 | 1550 if(v){ |
1551 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1552 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
2967 | 1553 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1554 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1555 (xc++)->coeff= v; |
2605 | 1556 } |
2138 | 1557 }else{ |
1558 if(!run){ | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1559 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1560 else run= INT_MAX; |
2605 | 1561 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1562 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
2967 | 1563 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1564 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1565 (xc++)->coeff= v; |
2138 | 1566 }else{ |
2606 | 1567 int max_run; |
2138 | 1568 run--; |
1569 v=0; | |
2191 | 1570 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1571 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
2606 | 1572 else max_run= FFMIN(run, w-x-1); |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1573 if(parent_xc) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1574 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
2606 | 1575 x+= max_run; |
1576 run-= max_run; | |
2138 | 1577 } |
1578 } | |
2192 | 1579 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1580 (xc++)->x= w+1; //end marker |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1581 prev_xc= prev2_xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1582 prev2_xc= xc; |
2967 | 1583 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1584 if(parent_xc){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1585 if(y&1){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1586 while(parent_xc->x != parent->width+1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1587 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1588 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1589 prev_parent_xc= parent_xc; |
2192 | 1590 }else{ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1591 parent_xc= prev_parent_xc; |
2138 | 1592 } |
1593 } | |
1594 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1595 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1596 (xc++)->x= w+1; //end marker |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1597 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1598 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1599 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1600 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1601 const int w= b->width; |
2893 | 1602 int y; |
4594 | 1603 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 1604 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1605 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1606 int new_index = 0; |
2967 | 1607 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1608 START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1609 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1610 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1611 qadd= 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1612 qmul= 1<<QEXPSHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1613 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1614 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1615 /* If we are on the second or later slice, restore our index. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1616 if (start_y != 0) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1617 new_index = save_state[0]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1618 |
2967 | 1619 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1620 for(y=start_y; y<h; y++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1621 int x = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1622 int v; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1623 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1624 memset(line, 0, b->width*sizeof(IDWTELEM)); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1625 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1626 x = b->x_coeff[new_index++].x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1627 while(x < w) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1628 { |
2596 | 1629 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1630 register int u= -(v&1); | |
1631 line[x] = (t^u) - u; | |
1632 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1633 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1634 x = b->x_coeff[new_index++].x; |
2138 | 1635 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1636 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1637 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1638 STOP_TIMER("decode_subband") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1639 } |
2967 | 1640 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1641 /* Save our variables for the next slice. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1642 save_state[0] = new_index; |
2967 | 1643 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1644 return; |
2138 | 1645 } |
1646 | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1647 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
2138 | 1648 int plane_index, level, orientation; |
1649 | |
2199 | 1650 for(plane_index=0; plane_index<3; plane_index++){ |
2138 | 1651 for(level=0; level<s->spatial_decomposition_count; level++){ |
1652 for(orientation=level ? 1:0; orientation<4; orientation++){ | |
2335 | 1653 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
2138 | 1654 } |
1655 } | |
1656 } | |
2335 | 1657 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1658 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1659 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1660 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 static int alloc_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1662 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
2967 | 1664 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 s->b_width = w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 s->b_height= h; |
2967 | 1667 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1668 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1669 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1671 |
2335 | 1672 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1673 uint8_t *bytestream= d->bytestream; | |
1674 uint8_t *bytestream_start= d->bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1675 *d= *s; |
2335 | 1676 d->bytestream= bytestream; |
1677 d->bytestream_start= bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1678 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1680 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 static int pix_sum(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1682 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 int s, i, j; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1684 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1686 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 for (j = 0; j < w; j++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1688 s += pix[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1695 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 static int pix_norm1(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 int s, i, j; |
4179 | 1700 uint32_t *sq = ff_squareTbl + 256; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 for (j = 0; j < w; j ++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 s += sq[pix[0]]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1708 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1709 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1710 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1711 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1712 |
3314 | 1713 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1714 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1715 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1716 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1717 const int block_w= 1<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1718 BlockNode block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1719 int i,j; |
2967 | 1720 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1721 block.color[0]= l; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1722 block.color[1]= cb; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1723 block.color[2]= cr; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1724 block.mx= mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1725 block.my= my; |
3314 | 1726 block.ref= ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1727 block.type= type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1728 block.level= level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 for(j=0; j<block_w; j++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 for(i=0; i<block_w; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 s->block[index + i + j*w]= block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1738 const int offset[3]= { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1739 y*c-> stride + x, |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1740 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1741 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1742 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1743 int i; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1744 for(i=0; i<3; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1745 c->src[0][i]= src [i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1746 c->ref[0][i]= ref [i] + offset[i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1747 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1748 assert(!ref_index); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1749 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1750 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1751 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1752 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1753 if(s->ref_frames == 1){ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1754 *mx = mid_pred(left->mx, top->mx, tr->mx); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1755 *my = mid_pred(left->my, top->my, tr->my); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1756 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1757 const int *scale = scale_mv_ref[ref]; |
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1758 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1759 (top ->mx * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1760 (tr ->mx * scale[tr ->ref] + 128) >>8); |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1761 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1762 (top ->my * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1763 (tr ->my * scale[tr ->ref] + 128) >>8); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1764 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1765 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1766 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1767 //FIXME copy&paste |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1768 #define P_LEFT P[1] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1769 #define P_TOP P[2] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 #define P_TOPRIGHT P[3] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1771 #define P_MEDIAN P[4] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1772 #define P_MV1 P[9] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1773 #define FLAG_QPEL 1 //must be 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1774 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1775 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1776 uint8_t p_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1777 uint8_t i_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1778 uint8_t p_state[sizeof(s->block_state)]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1779 uint8_t i_state[sizeof(s->block_state)]; |
2335 | 1780 RangeCoder pc, ic; |
1781 uint8_t *pbbak= s->c.bytestream; | |
1782 uint8_t *pbbak_start= s->c.bytestream_start; | |
5082 | 1783 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 const int h= s->b_height << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1786 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1787 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1788 const int block_w= 1<<(LOG2_MB_SIZE - level); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1789 int trx= (x+1)<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1790 int try= (y+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1791 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1792 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1793 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1794 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1795 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1796 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1797 int pl = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1798 int pcb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1800 int pmx, pmy; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1801 int mx=0, my=0; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1802 int l,cr,cb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1803 const int stride= s->current_picture.linesize[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1804 const int uvstride= s->current_picture.linesize[1]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1805 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1806 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1807 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1808 int P[10][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1809 int16_t last_mv[3][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1810 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1811 const int shift= 1+qpel; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1812 MotionEstContext *c= &s->m.me; |
3314 | 1813 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1814 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
1815 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1816 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
3314 | 1817 int ref, best_ref, ref_score, ref_mx, ref_my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1818 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1819 assert(sizeof(s->block_state) >= 256); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1820 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1821 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1822 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 // clip predictors / edge ? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 P_LEFT[0]= left->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1828 P_LEFT[1]= left->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 P_TOP [0]= top->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 P_TOP [1]= top->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 P_TOPRIGHT[0]= tr->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 P_TOPRIGHT[1]= tr->my; |
2967 | 1833 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 last_mv[0][0]= s->block[index].mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1835 last_mv[0][1]= s->block[index].my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1836 last_mv[1][0]= right->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1837 last_mv[1][1]= right->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1838 last_mv[2][0]= bottom->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1839 last_mv[2][1]= bottom->my; |
2967 | 1840 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1841 s->m.mb_stride=2; |
2967 | 1842 s->m.mb_x= |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1843 s->m.mb_y= 0; |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1844 c->skip= 0; |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1845 |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1846 assert(c-> stride == stride); |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1847 assert(c->uvstride == uvstride); |
2967 | 1848 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1849 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1850 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1851 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1852 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
2967 | 1853 |
2206 | 1854 c->xmin = - x*block_w - 16+2; |
1855 c->ymin = - y*block_w - 16+2; | |
1856 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
1857 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1858 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1859 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
2967 | 1860 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1861 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1862 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1863 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1864 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1865 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1866 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1867 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1868 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1869 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1870 if (!y) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1871 c->pred_x= P_LEFT[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1872 c->pred_y= P_LEFT[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1873 } else { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1874 c->pred_x = P_MEDIAN[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1875 c->pred_y = P_MEDIAN[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1876 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1877 |
3314 | 1878 score= INT_MAX; |
1879 best_ref= 0; | |
1880 for(ref=0; ref<s->ref_frames; ref++){ | |
1881 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
1882 | |
1883 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
1884 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
1885 | |
1886 assert(ref_mx >= c->xmin); | |
1887 assert(ref_mx <= c->xmax); | |
1888 assert(ref_my >= c->ymin); | |
1889 assert(ref_my <= c->ymax); | |
1890 | |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1891 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
3314 | 1892 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
1893 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
1894 if(s->ref_mvs[ref]){ | |
1895 s->ref_mvs[ref][index][0]= ref_mx; | |
1896 s->ref_mvs[ref][index][1]= ref_my; | |
1897 s->ref_scores[ref][index]= ref_score; | |
1898 } | |
1899 if(score > ref_score){ | |
1900 score= ref_score; | |
1901 best_ref= ref; | |
1902 mx= ref_mx; | |
1903 my= ref_my; | |
1904 } | |
1905 } | |
5127 | 1906 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
2967 | 1907 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1908 // subpel search |
5085 | 1909 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1910 pc= s->c; |
2335 | 1911 pc.bytestream_start= |
1912 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1913 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1914 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1915 if(level!=s->block_max_depth) |
2335 | 1916 put_rac(&pc, &p_state[4 + s_context], 1); |
1917 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
3314 | 1918 if(s->ref_frames > 1) |
1919 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1920 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
3314 | 1921 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
1922 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
2335 | 1923 p_len= pc.bytestream - pc.bytestream_start; |
5082 | 1924 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1925 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1926 block_s= block_w*block_w; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1927 sum = pix_sum(current_data[0], stride, block_w); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1928 l= (sum + block_s/2)/block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1929 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2967 | 1930 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 block_s= block_w*block_w>>2; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1932 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 cb= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1934 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1935 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1936 cr= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1938 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1939 ic= s->c; |
2335 | 1940 ic.bytestream_start= |
1941 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1942 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1943 if(level!=s->block_max_depth) |
2335 | 1944 put_rac(&ic, &i_state[4 + s_context], 1); |
1945 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1946 put_symbol(&ic, &i_state[32], l-pl , 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1947 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
2335 | 1949 i_len= ic.bytestream - ic.bytestream_start; |
5082 | 1950 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1951 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1952 // assert(score==256*256*256*64-1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1953 assert(iscore < 255*255*256 + s->lambda2*10); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1954 assert(iscore >= 0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1955 assert(l>=0 && l<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 assert(pl>=0 && pl<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1957 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1958 if(level==0){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1959 int varc= iscore >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1960 int vard= score >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1961 if (vard <= 64 || vard < varc) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1962 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1963 else |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1964 c->scene_change_score+= s->m.qscale; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1965 } |
2967 | 1966 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1967 if(level!=s->block_max_depth){ |
2335 | 1968 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1969 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1970 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1971 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1972 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1973 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
2967 | 1974 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1975 if(score2 < score && score2 < iscore) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1976 return score2; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1977 } |
2967 | 1978 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1979 if(iscore < score){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1980 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2335 | 1981 memcpy(pbbak, i_buffer, i_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1982 s->c= ic; |
2335 | 1983 s->c.bytestream_start= pbbak_start; |
1984 s->c.bytestream= pbbak + i_len; | |
3314 | 1985 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1986 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1987 return iscore; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1988 }else{ |
2335 | 1989 memcpy(pbbak, p_buffer, p_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1990 s->c= pc; |
2335 | 1991 s->c.bytestream_start= pbbak_start; |
1992 s->c.bytestream= pbbak + p_len; | |
3314 | 1993 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1994 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1995 return score; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1996 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1997 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1998 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1999 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2002 }else{ |
3314 | 2003 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2004 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2006 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2007 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2008 const int w= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2009 const int rem_depth= s->block_max_depth - level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2010 const int index= (x + y*w) << rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2011 int trx= (x+1)<<rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2012 BlockNode *b= &s->block[index]; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2013 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2014 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2015 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2016 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2017 int pl = left->color[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2018 int pcb= left->color[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2019 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2020 int pmx, pmy; |
3314 | 2021 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 2022 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
2023 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2024 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2025 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2026 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2027 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2028 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2029 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2030 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2031 if(level!=s->block_max_depth){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2032 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
2033 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
2034 }else{ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2035 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2036 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2037 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2038 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2039 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2040 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2041 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2042 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2043 if(b->type & BLOCK_INTRA){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2044 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2045 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2046 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2047 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2048 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
3314 | 2049 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2050 }else{ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2051 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2052 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
3314 | 2053 if(s->ref_frames > 1) |
2054 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2055 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2056 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
3314 | 2057 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2058 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2059 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2060 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2061 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2062 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2063 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2064 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 int trx= (x+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2066 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2067 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2068 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2069 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2070 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
2967 | 2071 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2072 if(s->keyframe){ |
3314 | 2073 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2074 return; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2075 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2076 |
2335 | 2077 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
4332 | 2078 int type, mx, my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2079 int l = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2080 int cb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2081 int cr= left->color[2]; |
3314 | 2082 int ref = 0; |
2083 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
4001 | 2084 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
2085 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2967 | 2086 |
2335 | 2087 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2088 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2089 if(type){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2090 pred_mv(s, &mx, &my, 0, left, top, tr); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2091 l += get_symbol(&s->c, &s->block_state[32], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2092 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2093 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2094 }else{ |
3314 | 2095 if(s->ref_frames > 1) |
2096 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2097 pred_mv(s, &mx, &my, ref, left, top, tr); |
3314 | 2098 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
2099 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2100 } |
3314 | 2101 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2102 }else{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2103 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2104 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2105 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2106 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2107 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2108 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2109 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2110 static void encode_blocks(SnowContext *s, int search){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2111 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2112 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2113 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2114 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2115 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2116 iterative_me(s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2117 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2118 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2119 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2422 | 2120 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2121 return; | |
2122 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2123 for(x=0; x<w; x++){ |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2124 if(s->avctx->me_method == ME_ITER || !search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2125 encode_q_branch2(s, 0, x, y); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2126 else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2127 encode_q_branch (s, 0, x, y); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2128 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2129 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2130 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2131 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2132 static void decode_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2133 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2134 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2135 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2136 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2137 for(y=0; y<h; y++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2138 for(x=0; x<w; x++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2139 decode_q_branch(s, 0, x, y); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2140 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2141 } |
2138 | 2142 } |
2143 | |
5254 | 2144 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
2138 | 2145 int x, y; |
2221 | 2146 START_TIMER |
2138 | 2147 for(y=0; y < b_h+5; y++){ |
2148 for(x=0; x < b_w; x++){ | |
2221 | 2149 int a0= src[x ]; |
2150 int a1= src[x + 1]; | |
2151 int a2= src[x + 2]; | |
2152 int a3= src[x + 3]; | |
2153 int a4= src[x + 4]; | |
2154 int a5= src[x + 5]; | |
2138 | 2155 // int am= 9*(a1+a2) - (a0+a3); |
2156 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); | |
2157 // int am= 18*(a2+a3) - 2*(a1+a4); | |
2158 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | |
2159 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; | |
2160 | |
2161 // if(b_w==16) am= 8*(a1+a2); | |
2162 | |
2610
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2163 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8; |
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2164 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8; |
2967 | 2165 |
2610
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2166 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/ |
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2167 if(am&(~255)) am= ~(am>>31); |
2967 | 2168 |
2610
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2169 tmp[x] = am; |
2138 | 2170 |
2171 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6; | |
2172 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6; | |
2173 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6; | |
2174 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/ | |
2175 } | |
2221 | 2176 tmp += stride; |
2177 src += stride; | |
2138 | 2178 } |
2221 | 2179 tmp -= (b_h+5)*stride; |
2967 | 2180 |
2138 | 2181 for(y=0; y < b_h; y++){ |
2182 for(x=0; x < b_w; x++){ | |
2221 | 2183 int a0= tmp[x + 0*stride]; |
2184 int a1= tmp[x + 1*stride]; | |
2185 int a2= tmp[x + 2*stride]; | |
2186 int a3= tmp[x + 3*stride]; | |
2187 int a4= tmp[x + 4*stride]; | |
2188 int a5= tmp[x + 5*stride]; | |
2138 | 2189 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
2190 // int am= 18*(a2+a3) - 2*(a1+a4); | |
2191 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; | |
2192 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ | |
2967 | 2193 |
2138 | 2194 // if(b_w==16) am= 8*(a1+a2); |
2195 | |
2610
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2196 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8; |
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2197 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8; |
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2198 |
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2199 if(am&(~255)) am= ~(am>>31); |
2967 | 2200 |
2610
f794026f4551
fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net
michael
parents:
2609
diff
changeset
|
2201 dst[x] = am; |
2138 | 2202 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6; |
2203 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6; | |
2204 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6; | |
2205 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/ | |
2206 } | |
2221 | 2207 dst += stride; |
2208 tmp += stride; | |
2138 | 2209 } |
2221 | 2210 STOP_TIMER("mc_block") |
2138 | 2211 } |
2212 | |
2213 #define mca(dx,dy,b_w)\ | |
5254 | 2214 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
2138 | 2215 uint8_t tmp[stride*(b_w+5)];\ |
2216 assert(h==b_w);\ | |
2217 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\ | |
2218 } | |
2219 | |
2220 mca( 0, 0,16) | |
2221 mca( 8, 0,16) | |
2222 mca( 0, 8,16) | |
2223 mca( 8, 8,16) | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2224 mca( 0, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2225 mca( 8, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2226 mca( 0, 8,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2227 mca( 8, 8,8) |
2138 | 2228 |
3314 | 2229 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2230 if(block->type & BLOCK_INTRA){ |
2206 | 2231 int x, y; |
3018 | 2232 const int color = block->color[plane_index]; |
2233 const int color4= color*0x01010101; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2234 if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2235 for(y=0; y < b_h; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2236 *(uint32_t*)&dst[0 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2237 *(uint32_t*)&dst[4 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2238 *(uint32_t*)&dst[8 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2239 *(uint32_t*)&dst[12+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2240 *(uint32_t*)&dst[16+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2241 *(uint32_t*)&dst[20+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2242 *(uint32_t*)&dst[24+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2243 *(uint32_t*)&dst[28+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2244 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2245 }else if(b_w==16){ |
3018 | 2246 for(y=0; y < b_h; y++){ |
2247 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2248 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2249 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2250 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2251 } | |
2252 }else if(b_w==8){ | |
2253 for(y=0; y < b_h; y++){ | |
2254 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2255 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2256 } | |
2257 }else if(b_w==4){ | |
2258 for(y=0; y < b_h; y++){ | |
2259 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2260 } | |
2261 }else{ | |
2262 for(y=0; y < b_h; y++){ | |
2263 for(x=0; x < b_w; x++){ | |
2264 dst[x + y*stride]= color; | |
2265 } | |
2138 | 2266 } |
2267 } | |
2268 }else{ | |
3314 | 2269 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
2206 | 2270 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
2271 int mx= block->mx*scale; | |
2272 int my= block->my*scale; | |
2223 | 2273 const int dx= mx&15; |
2274 const int dy= my&15; | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2275 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
2206 | 2276 sx += (mx>>4) - 2; |
2277 sy += (my>>4) - 2; | |
2278 src += sx + sy*stride; | |
2279 if( (unsigned)sx >= w - b_w - 4 | |
2280 || (unsigned)sy >= h - b_h - 4){ | |
2281 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); | |
2282 src= tmp + MB_SIZE; | |
2138 | 2283 } |
3189 | 2284 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2285 // assert(!(b_w&(b_w-1))); | |
3018 | 2286 assert(b_w>1 && b_h>1); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2287 assert(tab_index>=0 && tab_index<4 || b_w==32); |
3189 | 2288 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1))) |
2223 | 2289 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2290 else if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2291 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2292 for(y=0; y<b_h; y+=16){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2293 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2294 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2295 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2296 }else if(b_w==b_h) |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2297 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); |
3018 | 2298 else if(b_w==2*b_h){ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2299 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2300 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); |
3018 | 2301 }else{ |
2302 assert(2*b_w==b_h); | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2303 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2304 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); |
3018 | 2305 } |
2138 | 2306 } |
2307 } | |
2308 | |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2309 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2310 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2311 int y, x; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2312 IDWTELEM * dst; |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2313 for(y=0; y<b_h; y++){ |
5409 | 2314 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2315 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2316 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2317 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2318 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2319 dst = slice_buffer_get_line(sb, src_y + y); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2320 for(x=0; x<b_w; x++){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2321 int v= obmc1[x] * block[3][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2322 +obmc2[x] * block[2][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2323 +obmc3[x] * block[1][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2324 +obmc4[x] * block[0][x + y*src_stride]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2325 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2326 v <<= 8 - LOG2_OBMC_MAX; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2327 if(FRAC_BITS != 8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2328 v >>= 8 - FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2329 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2330 if(add){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2331 v += dst[x + src_x]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2332 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2333 if(v&(~255)) v= ~(v>>31); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2334 dst8[x + y*src_stride] = v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2335 }else{ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2336 dst[x + src_x] -= v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2337 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2338 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2339 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2340 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2341 |
2206 | 2342 //FIXME name clenup (b_w, block_w, b_width stuff) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2343 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2344 const int b_width = s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2345 const int b_height= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2346 const int b_stride= b_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2347 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2348 BlockNode *rt= lt+1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2349 BlockNode *lb= lt+b_stride; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2350 BlockNode *rb= lb+1; |
2967 | 2351 uint8_t *block[4]; |
2842 | 2352 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
2353 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
2354 uint8_t *ptmp; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2355 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2356 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2357 if(b_x<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2358 lt= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2359 lb= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2360 }else if(b_x + 1 >= b_width){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2361 rt= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2362 rb= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2363 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2364 if(b_y<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2365 lt= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2366 rt= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2367 }else if(b_y + 1 >= b_height){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2368 lb= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2369 rb= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2370 } |
2967 | 2371 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2372 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2373 obmc -= src_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2374 b_w += src_x; |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2375 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2376 dst -= src_x; |
2206 | 2377 src_x=0; |
2378 }else if(src_x + b_w > w){ | |
2379 b_w = w - src_x; | |
2380 } | |
2381 if(src_y<0){ | |
2382 obmc -= src_y*obmc_stride; | |
2383 b_h += src_y; | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2384 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2385 dst -= src_y*dst_stride; |
2206 | 2386 src_y=0; |
2387 }else if(src_y + b_h> h){ | |
2388 b_h = h - src_y; | |
2389 } | |
2967 | 2390 |
2206 | 2391 if(b_w<=0 || b_h<=0) return; |
2392 | |
2842 | 2393 assert(src_stride > 2*MB_SIZE + 5); |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2394 if(!sliced && offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2395 dst += src_x + src_y*dst_stride; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2396 dst8+= src_x + src_y*src_stride; |
2206 | 2397 // src += src_x + src_y*src_stride; |
2398 | |
2842 | 2399 ptmp= tmp + 3*tmp_step; |
2400 block[0]= ptmp; | |
2401 ptmp+=tmp_step; | |
3314 | 2402 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
2206 | 2403 |
2404 if(same_block(lt, rt)){ | |
2405 block[1]= block[0]; | |
2406 }else{ | |
2842 | 2407 block[1]= ptmp; |
2408 ptmp+=tmp_step; | |
3314 | 2409 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2206 | 2410 } |
2967 | 2411 |
2206 | 2412 if(same_block(lt, lb)){ |
2413 block[2]= block[0]; | |
2414 }else if(same_block(rt, lb)){ | |
2415 block[2]= block[1]; | |
2416 }else{ | |
2842 | 2417 block[2]= ptmp; |
2418 ptmp+=tmp_step; | |
3314 | 2419 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2206 | 2420 } |
2421 | |
2422 if(same_block(lt, rb) ){ | |
2423 block[3]= block[0]; | |
2424 }else if(same_block(rt, rb)){ | |
2425 block[3]= block[1]; | |
2426 }else if(same_block(lb, rb)){ | |
2427 block[3]= block[2]; | |
2428 }else{ | |
2842 | 2429 block[3]= ptmp; |
3314 | 2430 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2206 | 2431 } |
2432 #if 0 | |
2433 for(y=0; y<b_h; y++){ | |
2434 for(x=0; x<b_w; x++){ | |
2435 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2436 if(add) dst[x + y*dst_stride] += v; | |
2437 else dst[x + y*dst_stride] -= v; | |
2438 } | |
2439 } | |
2440 for(y=0; y<b_h; y++){ | |
2441 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2442 for(x=0; x<b_w; x++){ | |
2443 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2444 if(add) dst[x + y*dst_stride] += v; | |
2445 else dst[x + y*dst_stride] -= v; | |
2446 } | |
2447 } | |
2448 for(y=0; y<b_h; y++){ | |
2449 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2450 for(x=0; x<b_w; x++){ | |
2451 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2452 if(add) dst[x + y*dst_stride] += v; | |
2453 else dst[x + y*dst_stride] -= v; | |
2454 } | |
2455 } | |
2456 for(y=0; y<b_h; y++){ | |
2457 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2458 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2459 for(x=0; x<b_w; x++){ | |
2460 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2461 if(add) dst[x + y*dst_stride] += v; | |
2462 else dst[x + y*dst_stride] -= v; | |
2463 } | |
2464 } | |
2465 #else | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2466 if(sliced){ |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2467 START_TIMER |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2468 |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2469 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2470 STOP_TIMER("inner_add_yblock") |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2471 }else |
2206 | 2472 for(y=0; y<b_h; y++){ |
5409 | 2473 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2474 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2475 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2476 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2477 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
2206 | 2478 for(x=0; x<b_w; x++){ |
2479 int v= obmc1[x] * block[3][x + y*src_stride] | |
2480 +obmc2[x] * block[2][x + y*src_stride] | |
2481 +obmc3[x] * block[1][x + y*src_stride] | |
2482 +obmc4[x] * block[0][x + y*src_stride]; | |
2967 | 2483 |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2484 v <<= 8 - LOG2_OBMC_MAX; |
2246 | 2485 if(FRAC_BITS != 8){ |
2486 v >>= 8 - FRAC_BITS; | |
2487 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2488 if(add){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2489 v += dst[x + y*dst_stride]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2490 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2491 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2492 dst8[x + y*src_stride] = v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2493 }else{ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2494 dst[x + y*dst_stride] -= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2495 } |
2206 | 2496 } |
2497 } | |
2498 #endif | |
2499 } | |
2500 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2501 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2502 Plane *p= &s->plane[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2503 const int mb_w= s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2504 const int mb_h= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2505 int x, y, mb_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2506 int block_size = MB_SIZE >> s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2507 int block_w = plane_index ? block_size/2 : block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2508 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2509 int obmc_stride= plane_index ? block_size : 2*block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2510 int ref_stride= s->current_picture.linesize[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2511 uint8_t *dst8= s->current_picture.data[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2512 int w= p->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2513 int h= p->height; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2514 START_TIMER |
2967 | 2515 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2516 if(s->keyframe || (s->avctx->debug&512)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2517 if(mb_y==mb_h) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2518 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2519 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2520 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2521 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2522 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2523 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2524 IDWTELEM * line = sb->line[y]; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2525 for(x=0; x<w; x++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2526 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2527 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2528 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2529 v >>= FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2530 if(v&(~255)) v= ~(v>>31); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2531 dst8[x + y*ref_stride]= v; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2532 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2533 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2534 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2535 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2536 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2537 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2538 IDWTELEM * line = sb->line[y]; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2539 for(x=0; x<w; x++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2540 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2541 line[x] -= 128 << FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2542 // buf[x + y*w]-= 128<<FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2543 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2544 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2545 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2546 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2547 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2548 } |
2967 | 2549 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2550 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2551 START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2552 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2553 add_yblock(s, 1, sb, old_buffer, dst8, obmc, |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2554 block_w*mb_x - block_w/2, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2555 block_w*mb_y - block_w/2, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2556 block_w, block_w, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2557 w, h, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2558 w, ref_stride, obmc_stride, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2559 mb_x - 1, mb_y - 1, |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2560 add, 0, plane_index); |
2967 | 2561 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2562 STOP_TIMER("add_yblock") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2563 } |
2967 | 2564 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2565 STOP_TIMER("predict_slice") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2566 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2567 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2568 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2138 | 2569 Plane *p= &s->plane[plane_index]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2570 const int mb_w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2571 const int mb_h= s->b_height << s->block_max_depth; |
2562 | 2572 int x, y, mb_x; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2573 int block_size = MB_SIZE >> s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2574 int block_w = plane_index ? block_size/2 : block_size; |
2206 | 2575 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2576 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2577 int ref_stride= s->current_picture.linesize[plane_index]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2578 uint8_t *dst8= s->current_picture.data[plane_index]; |
2138 | 2579 int w= p->width; |
2580 int h= p->height; | |
2197 | 2581 START_TIMER |
2967 | 2582 |
2206 | 2583 if(s->keyframe || (s->avctx->debug&512)){ |
2562 | 2584 if(mb_y==mb_h) |
2585 return; | |
2586 | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2587 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2588 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2589 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2590 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2591 v >>= FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2592 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2593 dst8[x + y*ref_stride]= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2594 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2595 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2596 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2597 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2598 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2599 buf[x + y*w]-= 128<<FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2600 } |
2206 | 2601 } |
2138 | 2602 } |
2206 | 2603 |
2604 return; | |
2138 | 2605 } |
2967 | 2606 |
2206 | 2607 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2197 | 2608 START_TIMER |
2206 | 2609 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2610 add_yblock(s, 0, NULL, buf, dst8, obmc, |
2206 | 2611 block_w*mb_x - block_w/2, |
2138 | 2612 block_w*mb_y - block_w/2, |
2206 | 2613 block_w, block_w, |
2138 | 2614 w, h, |
2206 | 2615 w, ref_stride, obmc_stride, |
2616 mb_x - 1, mb_y - 1, | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2617 add, 1, plane_index); |
2967 | 2618 |
2206 | 2619 STOP_TIMER("add_yblock") |
2138 | 2620 } |
2967 | 2621 |
2562 | 2622 STOP_TIMER("predict_slice") |
2623 } | |
2624 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2625 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2562 | 2626 const int mb_h= s->b_height << s->block_max_depth; |
2627 int mb_y; | |
2628 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2629 predict_slice(s, buf, plane_index, add, mb_y); | |
2138 | 2630 } |
2631 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2632 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2633 int i, x2, y2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2634 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2635 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2636 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2637 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2638 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2639 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2640 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2641 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2642 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2643 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2644 const int h= p->height; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2645 int index= mb_x + mb_y*b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2646 BlockNode *b= &s->block[index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2647 BlockNode backup= *b; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2648 int ab=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2649 int aa=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2650 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2651 b->type|= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2652 b->color[plane_index]= 0; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2653 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2654 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2655 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2656 int mb_x2= mb_x + (i &1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2657 int mb_y2= mb_y + (i>>1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2658 int x= block_w*mb_x2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2659 int y= block_w*mb_y2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2660 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2661 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2662 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2663 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2664 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2665 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2666 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2667 int obmc_v= obmc[index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2668 int d; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2669 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2670 if(x<0) obmc_v += obmc[index + block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2671 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2672 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2673 //FIXME precalc this or simplify it somehow else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2674 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2675 d = -dst[index] + (1<<(FRAC_BITS-1)); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2676 dst[index] = d; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2677 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2678 aa += obmc_v * obmc_v; //FIXME precalclate this |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2679 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2680 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2681 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2682 *b= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2683 |
5127 | 2684 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2685 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2686 |
3051 | 2687 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
2688 const int b_stride = s->b_width << s->block_max_depth; | |
2689 const int b_height = s->b_height<< s->block_max_depth; | |
2690 int index= x + y*b_stride; | |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2691 const BlockNode *b = &s->block[index]; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2692 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2693 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2694 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2695 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
3051 | 2696 int dmx, dmy; |
4001 | 2697 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
2698 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
3051 | 2699 |
2700 if(x<0 || x>=b_stride || y>=b_height) | |
2701 return 0; | |
2702 /* | |
2703 1 0 0 | |
2704 01X 1-2 1 | |
2705 001XX 3-6 2-3 | |
2706 0001XXX 7-14 4-7 | |
2707 00001XXXX 15-30 8-15 | |
2708 */ | |
2709 //FIXME try accurate rate | |
2710 //FIXME intra and inter predictors if surrounding blocks arent the same type | |
2711 if(b->type & BLOCK_INTRA){ | |
4001 | 2712 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
2713 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
2714 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2715 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2716 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2717 dmx-= b->mx; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2718 dmy-= b->my; |
4001 | 2719 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
2720 + av_log2(2*FFABS(dmy)) | |
3314 | 2721 + av_log2(2*b->ref)); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2722 } |
3051 | 2723 } |
2724 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2725 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2726 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2727 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2728 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2729 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2730 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2731 uint8_t *dst= s->current_picture.data[plane_index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2732 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2733 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2734 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2735 uint8_t tmp[ref_stride*(2*MB_SIZE+5)]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2736 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2737 const int b_height = s->b_height<< s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2738 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2739 const int h= p->height; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2740 int distortion; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2741 int rate= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2742 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2743 int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2744 int sy= block_w*mb_y - block_w/2; |
3206 | 2745 int x0= FFMAX(0,-sx); |
2746 int y0= FFMAX(0,-sy); | |
2747 int x1= FFMIN(block_w*2, w-sx); | |
2748 int y1= FFMIN(block_w*2, h-sy); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2749 int i,x,y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2750 |
3314 | 2751 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2752 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2753 for(y=y0; y<y1; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2754 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2755 const IDWTELEM *pred1 = pred + y*obmc_stride; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2756 uint8_t *cur1 = cur + y*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2757 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2758 for(x=x0; x<x1; x++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2759 #if FRAC_BITS >= LOG2_OBMC_MAX |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2760 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2761 #else |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2762 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2763 #endif |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2764 v = (v + pred1[x]) >> FRAC_BITS; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2765 if(v&(~255)) v= ~(v>>31); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2766 dst1[x] = v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2767 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2768 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2769 |
3206 | 2770 /* copy the regions where obmc[] = (uint8_t)256 */ |
2771 if(LOG2_OBMC_MAX == 8 | |
2772 && (mb_x == 0 || mb_x == b_stride-1) | |
2773 && (mb_y == 0 || mb_y == b_height-1)){ | |
2774 if(mb_x == 0) | |
2775 x1 = block_w; | |
2776 else | |
2777 x0 = block_w; | |
2778 if(mb_y == 0) | |
2779 y1 = block_w; | |
2780 else | |
2781 y0 = block_w; | |
2782 for(y=y0; y<y1; y++) | |
2783 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
2784 } | |
2785 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2786 if(block_w==16){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2787 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2788 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2789 /* FIXME cmps overlap but don't cover the wavelet's whole support, |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2790 * so improving the score of one block is not strictly guaranteed to |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2791 * improve the score of the whole frame, so iterative motion est |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2792 * doesn't always converge. */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2793 if(s->avctx->me_cmp == FF_CMP_W97) |
4197 | 2794 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2795 else if(s->avctx->me_cmp == FF_CMP_W53) |
4197 | 2796 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2797 else{ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2798 distortion = 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2799 for(i=0; i<4; i++){ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2800 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2801 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2802 } |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2803 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2804 }else{ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2805 assert(block_w==8); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2806 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2807 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2808 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2809 if(plane_index==0){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2810 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2811 /* ..RRr |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2812 * .RXx. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2813 * rxx.. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2814 */ |
3051 | 2815 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2816 } |
3057 | 2817 if(mb_x == b_stride-2) |
2818 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2819 } |
3051 | 2820 return distortion + rate*penalty_factor; |
2821 } | |
2822 | |
2823 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2824 int i, y2; | |
2825 Plane *p= &s->plane[plane_index]; | |
2826 const int block_size = MB_SIZE >> s->block_max_depth; | |
2827 const int block_w = plane_index ? block_size/2 : block_size; | |
2828 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2829 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2830 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2831 uint8_t *dst= s->current_picture.data[plane_index]; | |
2832 uint8_t *src= s-> input_picture.data[plane_index]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2833 static const IDWTELEM zero_dst[4096]; //FIXME |
3051 | 2834 const int b_stride = s->b_width << s->block_max_depth; |
2835 const int w= p->width; | |
2836 const int h= p->height; | |
2837 int distortion= 0; | |
2838 int rate= 0; | |
2839 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
2840 | |
2841 for(i=0; i<9; i++){ | |
2842 int mb_x2= mb_x + (i%3) - 1; | |
2843 int mb_y2= mb_y + (i/3) - 1; | |
2844 int x= block_w*mb_x2 + block_w/2; | |
2845 int y= block_w*mb_y2 + block_w/2; | |
2846 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2847 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3051 | 2848 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
2849 | |
2850 //FIXME find a cleaner/simpler way to skip the outside stuff | |
2851 for(y2= y; y2<0; y2++) | |
2852 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2853 for(y2= h; y2<y+block_w; y2++) | |
2854 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2855 if(x<0){ | |
2856 for(y2= y; y2<y+block_w; y2++) | |
2857 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
2858 } | |
2859 if(x+block_w > w){ | |
2860 for(y2= y; y2<y+block_w; y2++) | |
2861 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
2862 } | |
2863 | |
2864 assert(block_w== 8 || block_w==16); | |
2865 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
2866 } | |
2867 | |
2868 if(plane_index==0){ | |
2869 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
2870 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
2871 | |
2872 /* ..RRRr | |
2873 * .RXXx. | |
2874 * .RXXx. | |
2875 * rxxx. | |
2876 */ | |
2877 if(merged) | |
2878 rate = get_block_bits(s, mb_x, mb_y, 2); | |
2879 for(i=merged?4:0; i<9; i++){ | |
2880 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
2881 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
2882 } | |
2883 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2884 return distortion + rate*penalty_factor; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2885 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2886 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2887 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2888 const int b_stride= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2889 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2890 BlockNode backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2891 int rd, index, value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2892 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2893 assert(mb_x>=0 && mb_y>=0); |
2994 | 2894 assert(mb_x<b_stride); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2895 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2896 if(intra){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2897 block->color[0] = p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2898 block->color[1] = p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2899 block->color[2] = p[2]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2900 block->type |= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2901 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2902 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
3314 | 2903 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2904 if(s->me_cache[index] == value) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2905 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2906 s->me_cache[index]= value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2907 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2908 block->mx= p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2909 block->my= p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2910 block->type &= ~BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2911 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2912 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2913 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2914 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2915 //FIXME chroma |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2916 if(rd < *best_rd){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2917 *best_rd= rd; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2918 return 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2919 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2920 *block= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2921 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2922 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2923 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2924 |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2925 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2926 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2927 int p[2] = {p0, p1}; |
3197 | 2928 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2929 } |
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2930 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2931 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
3051 | 2932 const int b_stride= s->b_width << s->block_max_depth; |
2933 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
2934 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
2935 int rd, index, value; | |
2936 | |
2937 assert(mb_x>=0 && mb_y>=0); | |
2938 assert(mb_x<b_stride); | |
2939 assert(((mb_x|mb_y)&1) == 0); | |
2940 | |
2941 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3314 | 2942 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
3051 | 2943 if(s->me_cache[index] == value) |
2944 return 0; | |
2945 s->me_cache[index]= value; | |
2946 | |
2947 block->mx= p0; | |
2948 block->my= p1; | |
3314 | 2949 block->ref= ref; |
3051 | 2950 block->type &= ~BLOCK_INTRA; |
2951 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
2952 | |
2953 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
2954 | |
2955 //FIXME chroma | |
2956 if(rd < *best_rd){ | |
2957 *best_rd= rd; | |
2958 return 1; | |
2959 }else{ | |
2960 block[0]= backup[0]; | |
2961 block[1]= backup[1]; | |
2962 block[b_stride]= backup[2]; | |
2963 block[b_stride+1]= backup[3]; | |
2964 return 0; | |
2965 } | |
2966 } | |
2967 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2968 static void iterative_me(SnowContext *s){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2969 int pass, mb_x, mb_y; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2970 const int b_width = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2971 const int b_height= s->b_height << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2972 const int b_stride= b_width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2973 int color[3]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2974 |
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2975 { |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2976 RangeCoder r = s->c; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2977 uint8_t state[sizeof(s->block_state)]; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2978 memcpy(state, s->block_state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2979 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2980 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2981 encode_q_branch(s, 0, mb_x, mb_y); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2982 s->c = r; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2983 memcpy(s->block_state, state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2984 } |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
2985 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2986 for(pass=0; pass<25; pass++){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2987 int change= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2988 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2989 for(mb_y= 0; mb_y<b_height; mb_y++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2990 for(mb_x= 0; mb_x<b_width; mb_x++){ |
3314 | 2991 int dia_change, i, j, ref; |
2992 int best_rd= INT_MAX, ref_rd; | |
2993 BlockNode backup, ref_b; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2994 const int index= mb_x + mb_y * b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2995 BlockNode *block= &s->block[index]; |
3324 | 2996 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
2997 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
2998 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
2999 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
3000 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
3001 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
3002 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
3003 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3004 const int b_w= (MB_SIZE >> s->block_max_depth); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3005 uint8_t obmc_edged[b_w*2][b_w*2]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3006 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3007 if(pass && (block->type & BLOCK_OPT)) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3008 continue; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3009 block->type |= BLOCK_OPT; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3010 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3011 backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3012 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3013 if(!s->me_cache_generation) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3014 memset(s->me_cache, 0, sizeof(s->me_cache)); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3015 s->me_cache_generation += 1<<22; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3016 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3017 //FIXME precalc |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3018 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3019 int x, y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3020 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3021 if(mb_x==0) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3022 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3023 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3024 if(mb_x==b_stride-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3025 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3026 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3027 if(mb_y==0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3028 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3029 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3030 for(y=1; y<b_w; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3031 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3032 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3033 if(mb_y==b_height-1){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3034 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3035 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3036 for(y=b_w; y<b_w*2-1; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3037 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3038 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3039 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3040 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3041 //skip stuff outside the picture |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3042 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3043 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3044 uint8_t *src= s-> input_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3045 uint8_t *dst= s->current_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3046 const int stride= s->current_picture.linesize[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3047 const int block_w= MB_SIZE >> s->block_max_depth; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3048 const int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3049 const int sy= block_w*mb_y - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3050 const int w= s->plane[0].width; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3051 const int h= s->plane[0].height; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3052 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3053 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3054 for(y=sy; y<0; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3055 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3056 for(y=h; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3057 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3058 if(sx<0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3059 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3060 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3061 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3062 if(sx+block_w*2 > w){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3063 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3064 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3065 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3066 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3067 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3068 // intra(black) = neighbors' contribution to the current block |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3069 for(i=0; i<3; i++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3070 color[i]= get_dc(s, mb_x, mb_y, i); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3071 |
5127 | 3072 // get previous score (cannot be cached due to OBMC) |
3057 | 3073 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
3074 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
3075 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
3076 }else | |
3197 | 3077 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
3078 | |
3314 | 3079 ref_b= *block; |
3080 ref_rd= best_rd; | |
3081 for(ref=0; ref < s->ref_frames; ref++){ | |
3082 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
3083 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
3084 continue; | |
3085 block->ref= ref; | |
3086 best_rd= INT_MAX; | |
3087 | |
3088 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
3089 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
3324 | 3090 if(tb) |
3314 | 3091 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
3324 | 3092 if(lb) |
3314 | 3093 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
3324 | 3094 if(rb) |
3314 | 3095 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
3324 | 3096 if(bb) |
3314 | 3097 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
3098 | |
3099 /* fullpel ME */ | |
3100 //FIXME avoid subpel interpol / round to nearest integer | |
3101 do{ | |
3102 dia_change=0; | |
3103 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3104 for(j=0; j<i; j++){ | |
3105 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3106 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3107 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3108 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3109 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3110 } |
3314 | 3111 }while(dia_change); |
3112 /* subpel ME */ | |
3113 do{ | |
3114 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3115 dia_change=0; | |
3116 for(i=0; i<8; i++) | |
3117 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
3118 }while(dia_change); | |
3119 //FIXME or try the standard 2 pass qpel or similar | |
3120 | |
3121 mvr[0][0]= block->mx; | |
3122 mvr[0][1]= block->my; | |
3123 if(ref_rd > best_rd){ | |
3124 ref_rd= best_rd; | |
3125 ref_b= *block; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3126 } |
3314 | 3127 } |
3128 best_rd= ref_rd; | |
3129 *block= ref_b; | |
2998 | 3130 #if 1 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3131 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3132 //FIXME RD style color selection |
2998 | 3133 #endif |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3134 if(!same_block(block, &backup)){ |
3324 | 3135 if(tb ) tb ->type &= ~BLOCK_OPT; |
3136 if(lb ) lb ->type &= ~BLOCK_OPT; | |
3137 if(rb ) rb ->type &= ~BLOCK_OPT; | |
3138 if(bb ) bb ->type &= ~BLOCK_OPT; | |
3139 if(tlb) tlb->type &= ~BLOCK_OPT; | |
3140 if(trb) trb->type &= ~BLOCK_OPT; | |
3141 if(blb) blb->type &= ~BLOCK_OPT; | |
3142 if(brb) brb->type &= ~BLOCK_OPT; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3143 change ++; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3144 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3145 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3146 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3147 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3148 if(!change) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3149 break; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3150 } |
3051 | 3151 |
3152 if(s->block_max_depth == 1){ | |
3153 int change= 0; | |
3154 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3155 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3324 | 3156 int i; |
3051 | 3157 int best_rd, init_rd; |
3158 const int index= mb_x + mb_y * b_stride; | |
3159 BlockNode *b[4]; | |
3160 | |
3161 b[0]= &s->block[index]; | |
3162 b[1]= b[0]+1; | |
3163 b[2]= b[0]+b_stride; | |
3164 b[3]= b[2]+1; | |
3165 if(same_block(b[0], b[1]) && | |
3166 same_block(b[0], b[2]) && | |
3167 same_block(b[0], b[3])) | |
3168 continue; | |
3169 | |
3170 if(!s->me_cache_generation) | |
3171 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3172 s->me_cache_generation += 1<<22; | |
3173 | |
3174 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3175 | |
3314 | 3176 //FIXME more multiref search? |
3051 | 3177 check_4block_inter(s, mb_x, mb_y, |
3178 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3314 | 3179 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
3051 | 3180 |
3181 for(i=0; i<4; i++) | |
3182 if(!(b[i]->type&BLOCK_INTRA)) | |
3314 | 3183 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
3051 | 3184 |
3185 if(init_rd != best_rd) | |
3186 change++; | |
3187 } | |
3188 } | |
3189 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3190 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3191 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3192 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3193 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
2138 | 3194 const int level= b->level; |
3195 const int w= b->width; | |
3196 const int h= b->height; | |
4594 | 3197 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
5575 | 3198 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
2150 | 3199 int x,y, thres1, thres2; |
2893 | 3200 // START_TIMER |
2138 | 3201 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3202 if(s->qlog == LOSSLESS_QLOG){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3203 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3204 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3205 dst[x + y*stride]= src[x + y*stride]; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3206 return; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3207 } |
2967 | 3208 |
2138 | 3209 bias= bias ? 0 : (3*qmul)>>3; |
2150 | 3210 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3211 thres2= 2*thres1; | |
2967 | 3212 |
2138 | 3213 if(!bias){ |
3214 for(y=0; y<h; y++){ | |
3215 for(x=0; x<w; x++){ | |
2150 | 3216 int i= src[x + y*stride]; |
2967 | 3217 |
2150 | 3218 if((unsigned)(i+thres1) > thres2){ |
3219 if(i>=0){ | |
3220 i<<= QEXPSHIFT; | |
3221 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3222 dst[x + y*stride]= i; |
2150 | 3223 }else{ |
3224 i= -i; | |
3225 i<<= QEXPSHIFT; | |
3226 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3227 dst[x + y*stride]= -i; |
2150 | 3228 } |
3229 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3230 dst[x + y*stride]= 0; |
2138 | 3231 } |
3232 } | |
3233 }else{ | |
3234 for(y=0; y<h; y++){ | |
3235 for(x=0; x<w; x++){ | |
2967 | 3236 int i= src[x + y*stride]; |
3237 | |
2150 | 3238 if((unsigned)(i+thres1) > thres2){ |
3239 if(i>=0){ | |
3240 i<<= QEXPSHIFT; | |
3241 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3242 dst[x + y*stride]= i; |
2150 | 3243 }else{ |
3244 i= -i; | |
3245 i<<= QEXPSHIFT; | |
3246 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3247 dst[x + y*stride]= -i; |
2150 | 3248 } |
3249 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3250 dst[x + y*stride]= 0; |
2138 | 3251 } |
3252 } | |
3253 } | |
2150 | 3254 if(level+1 == s->spatial_decomposition_count){ |
3255 // STOP_TIMER("quantize") | |
3256 } | |
2138 | 3257 } |
3258 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3259 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3260 const int w= b->width; |
4594 | 3261 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3262 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3263 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3264 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3265 START_TIMER |
2967 | 3266 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3267 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3268 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3269 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3270 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3271 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3272 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3273 int i= line[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3274 if(i<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3275 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3276 }else if(i>0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3277 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3278 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3279 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3280 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3281 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3282 STOP_TIMER("dquant") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3283 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3284 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3285 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3286 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
2138 | 3287 const int w= b->width; |
3288 const int h= b->height; | |
4594 | 3289 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3290 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2138 | 3291 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3292 int x,y; | |
2195 | 3293 START_TIMER |
2967 | 3294 |
2161 | 3295 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3296 |
2138 | 3297 for(y=0; y<h; y++){ |
3298 for(x=0; x<w; x++){ | |
3299 int i= src[x + y*stride]; | |
3300 if(i<0){ | |
3301 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3302 }else if(i>0){ | |
3303 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3304 } | |
3305 } | |
3306 } | |
2195 | 3307 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
3308 STOP_TIMER("dquant") | |
3309 } | |
2138 | 3310 } |
3311 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3312 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3313 const int w= b->width; |
3314 const int h= b->height; | |
3315 int x,y; | |
2967 | 3316 |
2138 | 3317 for(y=h-1; y>=0; y--){ |
3318 for(x=w-1; x>=0; x--){ | |
3319 int i= x + y*stride; | |
2967 | 3320 |
2138 | 3321 if(x){ |
3322 if(use_median){ | |
3323 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3324 else src[i] -= src[i - 1]; | |
3325 }else{ | |
3326 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3327 else src[i] -= src[i - 1]; | |
3328 } | |
3329 }else{ | |
3330 if(y) src[i] -= src[i - stride]; | |
3331 } | |
3332 } | |
3333 } | |
3334 } | |
3335 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3336 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3337 const int w= b->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3338 int x,y; |
2967 | 3339 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3340 // START_TIMER |
2967 | 3341 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3342 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3343 IDWTELEM * prev; |
2967 | 3344 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3345 if (start_y != 0) |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3346 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2967 | 3347 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3348 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3349 prev = line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3350 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3351 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3352 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3353 if(x){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3354 if(use_median){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3355 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3356 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3357 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3358 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3359 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3360 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3361 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3362 if(y) line[x] += prev[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3363 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3364 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3365 } |
2967 | 3366 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3367 // STOP_TIMER("correlate") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3368 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3369 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3370 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3371 const int w= b->width; |
3372 const int h= b->height; | |
3373 int x,y; | |
2967 | 3374 |
2138 | 3375 for(y=0; y<h; y++){ |
3376 for(x=0; x<w; x++){ | |
3377 int i= x + y*stride; | |
2967 | 3378 |
2138 | 3379 if(x){ |
3380 if(use_median){ | |
3381 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3382 else src[i] += src[i - 1]; | |
3383 }else{ | |
3384 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3385 else src[i] += src[i - 1]; | |
3386 } | |
3387 }else{ | |
3388 if(y) src[i] += src[i - stride]; | |
3389 } | |
3390 } | |
3391 } | |
3392 } | |
3393 | |
3394 static void encode_header(SnowContext *s){ | |
3395 int plane_index, level, orientation; | |
2967 | 3396 uint8_t kstate[32]; |
3397 | |
3398 memset(kstate, MID_STATE, sizeof(kstate)); | |
2138 | 3399 |
2335 | 3400 put_rac(&s->c, kstate, s->keyframe); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3401 if(s->keyframe || s->always_reset){ |
2199 | 3402 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3403 s->last_spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3404 s->last_qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3405 s->last_qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3406 s->last_mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3407 s->last_block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3408 } |
2138 | 3409 if(s->keyframe){ |
3410 put_symbol(&s->c, s->header_state, s->version, 0); | |
2335 | 3411 put_rac(&s->c, s->header_state, s->always_reset); |
2138 | 3412 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3413 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3414 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3415 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
3416 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
3417 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
2335 | 3418 put_rac(&s->c, s->header_state, s->spatial_scalability); |
3419 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
3314 | 3420 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
2138 | 3421 |
3422 for(plane_index=0; plane_index<2; plane_index++){ | |
3423 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3424 for(orientation=level ? 1:0; orientation<4; orientation++){ | |
3425 if(orientation==2) continue; | |
3426 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); | |
3427 } | |
3428 } | |
3429 } | |
3430 } | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3431 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3432 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3433 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3434 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3435 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3436 |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3437 s->last_spatial_decomposition_type= s->spatial_decomposition_type; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3438 s->last_qlog = s->qlog; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3439 s->last_qbias = s->qbias; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3440 s->last_mv_scale = s->mv_scale; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3441 s->last_block_max_depth = s->block_max_depth; |
2138 | 3442 } |
3443 | |
3444 static int decode_header(SnowContext *s){ | |
3445 int plane_index, level, orientation; | |
2335 | 3446 uint8_t kstate[32]; |
2138 | 3447 |
2967 | 3448 memset(kstate, MID_STATE, sizeof(kstate)); |
2335 | 3449 |
3450 s->keyframe= get_rac(&s->c, kstate); | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3451 if(s->keyframe || s->always_reset){ |
2199 | 3452 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3453 s->spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3454 s->qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3455 s->qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3456 s->mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3457 s->block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3458 } |
2138 | 3459 if(s->keyframe){ |
3460 s->version= get_symbol(&s->c, s->header_state, 0); | |
3461 if(s->version>0){ | |
3462 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); | |
3463 return -1; | |
3464 } | |
2335 | 3465 s->always_reset= get_rac(&s->c, s->header_state); |
2138 | 3466 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3467 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3468 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3469 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
3470 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3471 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2335 | 3472 s->spatial_scalability= get_rac(&s->c, s->header_state); |
3473 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
3314 | 3474 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1; |
2138 | 3475 |
3476 for(plane_index=0; plane_index<3; plane_index++){ | |
3477 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3478 for(orientation=level ? 1:0; orientation<4; orientation++){ | |
3479 int q; | |
3480 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; | |
3481 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; | |
3482 else q= get_symbol(&s->c, s->header_state, 1); | |
3483 s->plane[plane_index].band[level][orientation].qlog= q; | |
3484 } | |
3485 } | |
3486 } | |
3487 } | |
2967 | 3488 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3489 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
5588
effa59ca89b3
we only have 2 wavelets, the 3rd was just for experimentation ...
michael
parents:
5587
diff
changeset
|
3490 if(s->spatial_decomposition_type > 1){ |
2138 | 3491 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
3492 return -1; | |
3493 } | |
2967 | 3494 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3495 s->qlog += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3496 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3497 s->qbias += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3498 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3499 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
2952 | 3500 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
3501 s->block_max_depth= 0; | |
3502 return -1; | |
3503 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3504 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3505 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3506 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3507 |
3075 | 3508 static void init_qexp(void){ |
2600 | 3509 int i; |
3510 double v=128; | |
3511 | |
3512 for(i=0; i<QROOT; i++){ | |
3513 qexp[i]= lrintf(v); | |
2967 | 3514 v *= pow(2, 1.0 / QROOT); |
2600 | 3515 } |
3516 } | |
3517 | |
2138 | 3518 static int common_init(AVCodecContext *avctx){ |
3519 SnowContext *s = avctx->priv_data; | |
3520 int width, height; | |
3521 int level, orientation, plane_index, dec; | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3522 int i, j; |
2138 | 3523 |
3524 s->avctx= avctx; | |
2967 | 3525 |
2138 | 3526 dsputil_init(&s->dsp, avctx); |
3527 | |
3528 #define mcf(dx,dy)\ | |
3529 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3530 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3531 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3532 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3533 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3534 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
2138 | 3535 |
3536 mcf( 0, 0) | |
3537 mcf( 4, 0) | |
3538 mcf( 8, 0) | |
3539 mcf(12, 0) | |
3540 mcf( 0, 4) | |
3541 mcf( 4, 4) | |
3542 mcf( 8, 4) | |
3543 mcf(12, 4) | |
3544 mcf( 0, 8) | |
3545 mcf( 4, 8) | |
3546 mcf( 8, 8) | |
3547 mcf(12, 8) | |
3548 mcf( 0,12) | |
3549 mcf( 4,12) | |
3550 mcf( 8,12) | |
3551 mcf(12,12) | |
3552 | |
3553 #define mcfh(dx,dy)\ | |
3554 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3555 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3556 mc_block_hpel ## dx ## dy ## 16;\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3557 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3558 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3559 mc_block_hpel ## dx ## dy ## 8; |
2138 | 3560 |
3561 mcfh(0, 0) | |
3562 mcfh(8, 0) | |
3563 mcfh(0, 8) | |
3564 mcfh(8, 8) | |
2600 | 3565 |
3566 if(!qexp[0]) | |
3567 init_qexp(); | |
3568 | |
2138 | 3569 dec= s->spatial_decomposition_count= 5; |
3570 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type | |
2967 | 3571 |
2138 | 3572 s->chroma_h_shift= 1; //FIXME XXX |
3573 s->chroma_v_shift= 1; | |
2967 | 3574 |
2138 | 3575 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
2967 | 3576 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3577 width= s->avctx->width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3578 height= s->avctx->height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3579 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3580 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3581 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here |
2967 | 3582 |
2138 | 3583 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3584 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0; |
2967 | 3585 |
3586 for(plane_index=0; plane_index<3; plane_index++){ | |
2138 | 3587 int w= s->avctx->width; |
3588 int h= s->avctx->height; | |
3589 | |
3590 if(plane_index){ | |
3591 w>>= s->chroma_h_shift; | |
3592 h>>= s->chroma_v_shift; | |
3593 } | |
3594 s->plane[plane_index].width = w; | |
3595 s->plane[plane_index].height= h; | |
2160 | 3596 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); |
2138 | 3597 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3598 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3599 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 3600 |
2138 | 3601 b->buf= s->spatial_dwt_buffer; |
3602 b->level= level; | |
3603 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3604 b->width = (w + !(orientation&1))>>1; | |
3605 b->height= (h + !(orientation>1))>>1; | |
2967 | 3606 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3607 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3608 b->buf_x_offset = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3609 b->buf_y_offset = 0; |
2967 | 3610 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3611 if(orientation&1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3612 b->buf += (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3613 b->buf_x_offset = (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3614 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3615 if(orientation>1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3616 b->buf += b->stride>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3617 b->buf_y_offset = b->stride_line >> 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3618 } |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3619 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
2967 | 3620 |
2138 | 3621 if(level) |
3622 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3623 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2138 | 3624 } |
3625 w= (w+1)>>1; | |
3626 h= (h+1)>>1; | |
3627 } | |
3628 } | |
2967 | 3629 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3630 for(i=0; i<MAX_REF_FRAMES; i++) |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3631 for(j=0; j<MAX_REF_FRAMES; j++) |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3632 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3633 |
2138 | 3634 reset_contexts(s); |
2967 | 3635 /* |
2138 | 3636 width= s->width= avctx->width; |
3637 height= s->height= avctx->height; | |
2967 | 3638 |
2138 | 3639 assert(width && height); |
3640 */ | |
3641 s->avctx->get_buffer(s->avctx, &s->mconly_picture); | |
2967 | 3642 |
2138 | 3643 return 0; |
3644 } | |
3645 | |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3646 static int qscale2qlog(int qscale){ |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3647 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3648 + 61*QROOT/8; //<64 >60 |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3649 } |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3650 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3651 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
3313 | 3652 { |
3653 /* estimate the frame's complexity as a sum of weighted dwt coefs. | |
3654 * FIXME we know exact mv bits at this point, | |
3655 * but ratecontrol isn't set up to include them. */ | |
3656 uint32_t coef_sum= 0; | |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3657 int level, orientation, delta_qlog; |
3313 | 3658 |
3659 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3660 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3661 SubBand *b= &s->plane[0].band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3662 IDWTELEM *buf= b->ibuf; |
3313 | 3663 const int w= b->width; |
3664 const int h= b->height; | |
3665 const int stride= b->stride; | |
4594 | 3666 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3313 | 3667 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3668 const int qdiv= (1<<16)/qmul; | |
3669 int x, y; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3670 //FIXME this is ugly |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3671 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3672 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3673 buf[x+y*stride]= b->buf[x+y*stride]; |
3313 | 3674 if(orientation==0) |
3675 decorrelate(s, b, buf, stride, 1, 0); | |
3676 for(y=0; y<h; y++) | |
3677 for(x=0; x<w; x++) | |
3678 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
3679 } | |
3680 } | |
3681 | |
3682 /* ugly, ratecontrol just takes a sqrt again */ | |
3683 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
3684 assert(coef_sum < INT_MAX); | |
3685 | |
3686 if(pict->pict_type == I_TYPE){ | |
3687 s->m.current_picture.mb_var_sum= coef_sum; | |
3688 s->m.current_picture.mc_mb_var_sum= 0; | |
3689 }else{ | |
3690 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
3691 s->m.current_picture.mb_var_sum= 0; | |
3692 } | |
3693 | |
3694 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
3766 | 3695 if (pict->quality < 0) |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3696 return INT_MIN; |
3313 | 3697 s->lambda= pict->quality * 3/2; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3698 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3699 s->qlog+= delta_qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3700 return delta_qlog; |
3313 | 3701 } |
2138 | 3702 |
3703 static void calculate_vissual_weight(SnowContext *s, Plane *p){ | |
3704 int width = p->width; | |
3705 int height= p->height; | |
2198 | 3706 int level, orientation, x, y; |
2138 | 3707 |
3708 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3709 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3710 SubBand *b= &p->band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3711 IDWTELEM *ibuf= b->ibuf; |
2138 | 3712 int64_t error=0; |
2967 | 3713 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3714 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3715 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3716 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2138 | 3717 for(y=0; y<height; y++){ |
3718 for(x=0; x<width; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3719 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
2138 | 3720 error += d*d; |
3721 } | |
3722 } | |
3723 | |
3724 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
2164 | 3725 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/); |
2138 | 3726 } |
3727 } | |
3728 } | |
3729 | |
3730 static int encode_init(AVCodecContext *avctx) | |
3731 { | |
3732 SnowContext *s = avctx->priv_data; | |
2198 | 3733 int plane_index; |
2138 | 3734 |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3735 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2610
diff
changeset
|
3736 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3737 "use vstrict=-2 / -strict -2 to use it anyway\n"); |
2151 | 3738 return -1; |
3739 } | |
2967 | 3740 |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3741 if(avctx->prediction_method == DWT_97 |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3742 && (avctx->flags & CODEC_FLAG_QSCALE) |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3743 && avctx->global_quality == 0){ |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3744 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n"); |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3745 return -1; |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3746 } |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3747 |
2138 | 3748 common_init(avctx); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3749 alloc_blocks(s); |
2967 | 3750 |
2138 | 3751 s->version=0; |
2967 | 3752 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3753 s->m.avctx = avctx; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3754 s->m.flags = avctx->flags; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3755 s->m.bit_rate= avctx->bit_rate; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3756 |
2138 | 3757 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
3758 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3759 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3760 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
2138 | 3761 h263_encode_init(&s->m); //mv_penalty |
3762 | |
3314 | 3763 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
3764 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3765 if(avctx->flags&CODEC_FLAG_PASS1){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3766 if(!avctx->stats_out) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3767 avctx->stats_out = av_mallocz(256); |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3768 } |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3769 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3770 if(ff_rate_control_init(&s->m) < 0) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3771 return -1; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3772 } |
3313 | 3773 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3774 |
2138 | 3775 for(plane_index=0; plane_index<3; plane_index++){ |
3776 calculate_vissual_weight(s, &s->plane[plane_index]); | |
3777 } | |
2967 | 3778 |
3779 | |
2138 | 3780 avctx->coded_frame= &s->current_picture; |
3781 switch(avctx->pix_fmt){ | |
3782 // case PIX_FMT_YUV444P: | |
3783 // case PIX_FMT_YUV422P: | |
3784 case PIX_FMT_YUV420P: | |
3785 case PIX_FMT_GRAY8: | |
3786 // case PIX_FMT_YUV411P: | |
3787 // case PIX_FMT_YUV410P: | |
3788 s->colorspace_type= 0; | |
3789 break; | |
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
3790 /* case PIX_FMT_RGB32: |
2138 | 3791 s->colorspace= 1; |
3792 break;*/ | |
3793 default: | |
3794 av_log(avctx, AV_LOG_ERROR, "format not supported\n"); | |
3795 return -1; | |
3796 } | |
3797 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
3798 s->chroma_h_shift= 1; | |
3799 s->chroma_v_shift= 1; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3800 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3801 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3802 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3803 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3804 s->avctx->get_buffer(s->avctx, &s->input_picture); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3805 |
3314 | 3806 if(s->avctx->me_method == ME_ITER){ |
3807 int i; | |
3808 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
3809 for(i=0; i<s->max_ref_frames; i++){ | |
3810 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
3811 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
3812 } | |
3813 } | |
3814 | |
2138 | 3815 return 0; |
3816 } | |
3817 | |
3818 static int frame_start(SnowContext *s){ | |
3819 AVFrame tmp; | |
2187 | 3820 int w= s->avctx->width; //FIXME round up to x16 ? |
3821 int h= s->avctx->height; | |
2138 | 3822 |
2187 | 3823 if(s->current_picture.data[0]){ |
3824 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); | |
3825 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
3826 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
3827 } | |
3828 | |
3314 | 3829 tmp= s->last_picture[s->max_ref_frames-1]; |
3830 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
3831 s->last_picture[0]= s->current_picture; | |
2138 | 3832 s->current_picture= tmp; |
2967 | 3833 |
3314 | 3834 if(s->keyframe){ |
3835 s->ref_frames= 0; | |
3836 }else{ | |
3837 int i; | |
3838 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
3839 if(i && s->last_picture[i-1].key_frame) | |
3840 break; | |
3841 s->ref_frames= i; | |
3842 } | |
3843 | |
2138 | 3844 s->current_picture.reference= 1; |
3845 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
3846 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
3847 return -1; | |
3848 } | |
2967 | 3849 |
3314 | 3850 s->current_picture.key_frame= s->keyframe; |
3851 | |
2138 | 3852 return 0; |
3853 } | |
3854 | |
3855 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
3856 SnowContext *s = avctx->priv_data; | |
2335 | 3857 RangeCoder * const c= &s->c; |
2138 | 3858 AVFrame *pict = data; |
3859 const int width= s->avctx->width; | |
3860 const int height= s->avctx->height; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3861 int level, orientation, plane_index, i, y; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3862 uint8_t rc_header_bak[sizeof(s->header_state)]; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3863 uint8_t rc_block_bak[sizeof(s->block_state)]; |
2138 | 3864 |
2335 | 3865 ff_init_range_encoder(c, buf, buf_size); |
3866 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2967 | 3867 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3868 for(i=0; i<3; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3869 int shift= !!i; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3870 for(y=0; y<(height>>shift); y++) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3871 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3872 &pict->data[i][y * pict->linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3873 width>>shift); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3874 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3875 s->new_picture = *pict; |
2138 | 3876 |
3313 | 3877 s->m.picture_number= avctx->frame_number; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3878 if(avctx->flags&CODEC_FLAG_PASS2){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3879 s->m.pict_type = |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3880 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3881 s->keyframe= pict->pict_type==FF_I_TYPE; |
3766 | 3882 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
3193 | 3883 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
3766 | 3884 if (pict->quality < 0) |
3885 return -1; | |
3886 } | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3887 }else{ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3888 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3313 | 3889 s->m.pict_type= |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3890 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3891 } |
2967 | 3892 |
3313 | 3893 if(s->pass1_rc && avctx->frame_number == 0) |
3894 pict->quality= 2*FF_QP2LAMBDA; | |
2161 | 3895 if(pict->quality){ |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3896 s->qlog= qscale2qlog(pict->quality); |
3313 | 3897 s->lambda = pict->quality * 3/2; |
3898 } | |
3899 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
2161 | 3900 s->qlog= LOSSLESS_QLOG; |
3313 | 3901 s->lambda = 0; |
3902 }//else keep previous frame's qlog until after motion est | |
2138 | 3903 |
3904 frame_start(s); | |
3905 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3906 s->m.current_picture_ptr= &s->m.current_picture; |
2138 | 3907 if(pict->pict_type == P_TYPE){ |
3908 int block_width = (width +15)>>4; | |
3909 int block_height= (height+15)>>4; | |
3910 int stride= s->current_picture.linesize[0]; | |
2967 | 3911 |
2138 | 3912 assert(s->current_picture.data[0]); |
3314 | 3913 assert(s->last_picture[0].data[0]); |
2967 | 3914 |
2138 | 3915 s->m.avctx= s->avctx; |
3916 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3314 | 3917 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
2138 | 3918 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
3919 s->m. last_picture_ptr= &s->m. last_picture; | |
3920 s->m.linesize= | |
3921 s->m. last_picture.linesize[0]= | |
3922 s->m. new_picture.linesize[0]= | |
3923 s->m.current_picture.linesize[0]= stride; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3924 s->m.uvlinesize= s->current_picture.linesize[1]; |
2138 | 3925 s->m.width = width; |
3926 s->m.height= height; | |
3927 s->m.mb_width = block_width; | |
3928 s->m.mb_height= block_height; | |
3929 s->m.mb_stride= s->m.mb_width+1; | |
3930 s->m.b8_stride= 2*s->m.mb_width+1; | |
3931 s->m.f_code=1; | |
3932 s->m.pict_type= pict->pict_type; | |
3933 s->m.me_method= s->avctx->me_method; | |
3934 s->m.me.scene_change_score=0; | |
3935 s->m.flags= s->avctx->flags; | |
3936 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
3937 s->m.out_format= FMT_H263; | |
3938 s->m.unrestricted_mv= 1; | |
3939 | |
3313 | 3940 s->m.lambda = s->lambda; |
2138 | 3941 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3942 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3943 |
2138 | 3944 s->m.dsp= s->dsp; //move |
3945 ff_init_me(&s->m); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3946 s->dsp= s->m.dsp; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3947 } |
2967 | 3948 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3949 if(s->pass1_rc){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3950 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3951 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3952 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3953 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3954 redo_frame: |
2967 | 3955 |
3313 | 3956 s->m.pict_type = pict->pict_type; |
2138 | 3957 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; |
3958 | |
3959 encode_header(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3960 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3961 encode_blocks(s, 1); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
3962 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
2967 | 3963 |
2138 | 3964 for(plane_index=0; plane_index<3; plane_index++){ |
3965 Plane *p= &s->plane[plane_index]; | |
3966 int w= p->width; | |
3967 int h= p->height; | |
3968 int x, y; | |
2198 | 3969 // int bits= put_bits_count(&s->c.pb); |
2138 | 3970 |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
3971 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
2138 | 3972 //FIXME optimize |
3973 if(pict->data[plane_index]) //FIXME gray hack | |
3974 for(y=0; y<h; y++){ | |
3975 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3976 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; |
2138 | 3977 } |
3978 } | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3979 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); |
2967 | 3980 |
3981 if( plane_index==0 | |
3982 && pict->pict_type == P_TYPE | |
3313 | 3983 && !(avctx->flags&CODEC_FLAG_PASS2) |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3984 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ |
2335 | 3985 ff_init_range_encoder(c, buf, buf_size); |
3986 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3987 pict->pict_type= FF_I_TYPE; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3988 s->keyframe=1; |
3314 | 3989 s->current_picture.key_frame=1; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3990 goto redo_frame; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3991 } |
2967 | 3992 |
2161 | 3993 if(s->qlog == LOSSLESS_QLOG){ |
3994 for(y=0; y<h; y++){ | |
3995 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3996 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; |
2161 | 3997 } |
3998 } | |
5575 | 3999 }else{ |
4000 for(y=0; y<h; y++){ | |
4001 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4002 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; |
5575 | 4003 } |
4004 } | |
2161 | 4005 } |
2967 | 4006 |
2164 | 4007 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2161 | 4008 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4009 if(s->pass1_rc && plane_index==0){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4010 int delta_qlog = ratecontrol_1pass(s, pict); |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
4011 if (delta_qlog <= INT_MIN) |
3766 | 4012 return -1; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4013 if(delta_qlog){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4014 //reordering qlog in the bitstream would eliminate this reset |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4015 ff_init_range_encoder(c, buf, buf_size); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4016 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4017 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4018 encode_header(s); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4019 encode_blocks(s, 0); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4020 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4021 } |
3313 | 4022 |
2138 | 4023 for(level=0; level<s->spatial_decomposition_count; level++){ |
4024 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4025 SubBand *b= &p->band[level][orientation]; | |
2967 | 4026 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4027 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); |
2138 | 4028 if(orientation==0) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4029 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4030 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); |
2138 | 4031 assert(b->parent==NULL || b->parent->stride == b->stride*2); |
4032 if(orientation==0) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4033 correlate(s, b, b->ibuf, b->stride, 1, 0); |
2138 | 4034 } |
4035 } | |
4036 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); | |
4037 | |
4038 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4039 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4040 SubBand *b= &p->band[level][orientation]; | |
4041 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4042 dequantize(s, b, b->ibuf, b->stride); |
2138 | 4043 } |
4044 } | |
2161 | 4045 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4046 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2161 | 4047 if(s->qlog == LOSSLESS_QLOG){ |
4048 for(y=0; y<h; y++){ | |
4049 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4050 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; |
2161 | 4051 } |
4052 } | |
4053 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
4054 {START_TIMER |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4055 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
4056 STOP_TIMER("pred-conv")} |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4057 }else{ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4058 //ME/MC only |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4059 if(pict->pict_type == I_TYPE){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4060 for(y=0; y<h; y++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4061 for(x=0; x<w; x++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4062 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4063 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4064 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4065 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4066 }else{ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4067 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4068 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4069 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4070 } |
2138 | 4071 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4072 int64_t error= 0; | |
2967 | 4073 |
2138 | 4074 if(pict->data[plane_index]) //FIXME gray hack |
4075 for(y=0; y<h; y++){ | |
4076 for(x=0; x<w; x++){ | |
2161 | 4077 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
2138 | 4078 error += d*d; |
4079 } | |
4080 } | |
4081 s->avctx->error[plane_index] += error; | |
2232 | 4082 s->current_picture.error[plane_index] = error; |
2138 | 4083 } |
4084 } | |
4085 | |
3314 | 4086 if(s->last_picture[s->max_ref_frames-1].data[0]) |
4087 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); | |
2138 | 4088 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4089 s->current_picture.coded_picture_number = avctx->frame_number; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4090 s->current_picture.pict_type = pict->pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4091 s->current_picture.quality = pict->quality; |
3313 | 4092 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
4093 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
4094 s->m.current_picture.display_picture_number = | |
4095 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
4096 s->m.current_picture.quality = pict->quality; | |
4097 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
4098 if(s->pass1_rc) | |
3766 | 4099 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
4100 return -1; | |
3313 | 4101 if(avctx->flags&CODEC_FLAG_PASS1) |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4102 ff_write_pass1_stats(&s->m); |
3313 | 4103 s->m.last_pict_type = s->m.pict_type; |
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4104 avctx->frame_bits = s->m.frame_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4105 avctx->mv_bits = s->m.mv_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4106 avctx->misc_bits = s->m.misc_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4107 avctx->p_tex_bits = s->m.p_tex_bits; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4108 |
2138 | 4109 emms_c(); |
2967 | 4110 |
2335 | 4111 return ff_rac_terminate(c); |
2138 | 4112 } |
4113 | |
4114 static void common_end(SnowContext *s){ | |
3314 | 4115 int plane_index, level, orientation, i; |
2192 | 4116 |
2138 | 4117 av_freep(&s->spatial_dwt_buffer); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4118 av_freep(&s->spatial_idwt_buffer); |
2138 | 4119 |
2967 | 4120 av_freep(&s->m.me.scratchpad); |
2138 | 4121 av_freep(&s->m.me.map); |
4122 av_freep(&s->m.me.score_map); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4123 av_freep(&s->m.obmc_scratchpad); |
2967 | 4124 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4125 av_freep(&s->block); |
2192 | 4126 |
3314 | 4127 for(i=0; i<MAX_REF_FRAMES; i++){ |
4128 av_freep(&s->ref_mvs[i]); | |
4129 av_freep(&s->ref_scores[i]); | |
4130 if(s->last_picture[i].data[0]) | |
4131 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4132 } | |
4133 | |
2967 | 4134 for(plane_index=0; plane_index<3; plane_index++){ |
2192 | 4135 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
4136 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4137 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 4138 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4139 av_freep(&b->x_coeff); |
2192 | 4140 } |
4141 } | |
4142 } | |
2138 | 4143 } |
4144 | |
4145 static int encode_end(AVCodecContext *avctx) | |
4146 { | |
4147 SnowContext *s = avctx->priv_data; | |
4148 | |
4149 common_end(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4150 av_free(avctx->stats_out); |
2138 | 4151 |
4152 return 0; | |
4153 } | |
4154 | |
4155 static int decode_init(AVCodecContext *avctx) | |
4156 { | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4157 SnowContext *s = avctx->priv_data; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4158 int block_size; |
2967 | 4159 |
2635 | 4160 avctx->pix_fmt= PIX_FMT_YUV420P; |
2138 | 4161 |
4162 common_init(avctx); | |
2967 | 4163 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4164 block_size = MB_SIZE >> s->block_max_depth; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4165 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer); |
2967 | 4166 |
2138 | 4167 return 0; |
4168 } | |
4169 | |
4170 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ | |
4171 SnowContext *s = avctx->priv_data; | |
2335 | 4172 RangeCoder * const c= &s->c; |
2138 | 4173 int bytes_read; |
4174 AVFrame *picture = data; | |
2198 | 4175 int level, orientation, plane_index; |
2138 | 4176 |
2335 | 4177 ff_init_range_decoder(c, buf, buf_size); |
4178 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2138 | 4179 |
4180 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
4181 decode_header(s); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4182 if(!s->block) alloc_blocks(s); |
2138 | 4183 |
4184 frame_start(s); | |
4185 //keyframe flag dupliaction mess FIXME | |
4186 if(avctx->debug&FF_DEBUG_PICT_INFO) | |
4187 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2967 | 4188 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4189 decode_blocks(s); |
2138 | 4190 |
4191 for(plane_index=0; plane_index<3; plane_index++){ | |
4192 Plane *p= &s->plane[plane_index]; | |
4193 int w= p->width; | |
4194 int h= p->height; | |
4195 int x, y; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4196 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
2967 | 4197 |
2138 | 4198 if(s->avctx->debug&2048){ |
4199 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4200 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
2138 | 4201 |
4202 for(y=0; y<h; y++){ | |
4203 for(x=0; x<w; x++){ | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
4204 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; |
2138 | 4205 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; |
4206 } | |
4207 } | |
4208 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4209 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4210 { START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4211 for(level=0; level<s->spatial_decomposition_count; level++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4212 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4213 SubBand *b= &p->band[level][orientation]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4214 unpack_coeffs(s, b, b->parent, orientation); |
2138 | 4215 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4216 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4217 STOP_TIMER("unpack coeffs"); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4218 } |
2138 | 4219 |
2562 | 4220 {START_TIMER |
4221 const int mb_h= s->b_height << s->block_max_depth; | |
4222 const int block_size = MB_SIZE >> s->block_max_depth; | |
4223 const int block_w = plane_index ? block_size/2 : block_size; | |
4224 int mb_y; | |
4225 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
4226 int yd=0, yq=0; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4227 int y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4228 int end_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4229 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4230 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2562 | 4231 for(mb_y=0; mb_y<=mb_h; mb_y++){ |
2967 | 4232 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4233 int slice_starty = block_w*mb_y; |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4234 int slice_h = block_w*(mb_y+1); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4235 if (!(s->keyframe || s->avctx->debug&512)){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4236 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4237 slice_h -= (block_w >> 1); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4238 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4239 |
2967 | 4240 { |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4241 START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4242 for(level=0; level<s->spatial_decomposition_count; level++){ |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4243 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4244 SubBand *b= &p->band[level][orientation]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4245 int start_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4246 int end_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4247 int our_mb_start = mb_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4248 int our_mb_end = (mb_y + 1); |
3012 | 4249 const int extra= 3; |
4250 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4251 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4252 if (!(s->keyframe || s->avctx->debug&512)){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4253 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4254 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4255 } |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4256 start_y = FFMIN(b->height, start_y); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4257 end_y = FFMIN(b->height, end_y); |
2967 | 4258 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4259 if (start_y != end_y){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4260 if (orientation == 0){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4261 SubBand * correlate_band = &p->band[0][0]; |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4262 int correlate_end_y = FFMIN(b->height, end_y + 1); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4263 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4264 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4265 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4266 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4267 } |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4268 else |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4269 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4270 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4271 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4272 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4273 STOP_TIMER("decode_subband_slice"); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4274 } |
2967 | 4275 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4276 { START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4277 for(; yd<slice_h; yd+=4){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
4278 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4279 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4280 STOP_TIMER("idwt slice");} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4281 |
2967 | 4282 |
2161 | 4283 if(s->qlog == LOSSLESS_QLOG){ |
2562 | 4284 for(; yq<slice_h && yq<h; yq++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4285 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); |
2161 | 4286 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4287 line[x] <<= FRAC_BITS; |
2161 | 4288 } |
4289 } | |
4290 } | |
2562 | 4291 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4292 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); |
2967 | 4293 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4294 y = FFMIN(p->height, slice_starty); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4295 end_y = FFMIN(p->height, slice_h); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4296 while(y < end_y) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4297 slice_buffer_release(&s->sb, y++); |
2562 | 4298 } |
2967 | 4299 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4300 slice_buffer_flush(&s->sb); |
2967 | 4301 |
2562 | 4302 STOP_TIMER("idwt + predict_slices")} |
2138 | 4303 } |
2967 | 4304 |
2138 | 4305 emms_c(); |
4306 | |
3314 | 4307 if(s->last_picture[s->max_ref_frames-1].data[0]) |
4308 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); | |
2138 | 4309 |
2967 | 4310 if(!(s->avctx->debug&2048)) |
2138 | 4311 *picture= s->current_picture; |
4312 else | |
4313 *picture= s->mconly_picture; | |
2967 | 4314 |
2138 | 4315 *data_size = sizeof(AVFrame); |
2967 | 4316 |
2335 | 4317 bytes_read= c->bytestream - c->bytestream_start; |
4318 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2138 | 4319 |
4320 return bytes_read; | |
4321 } | |
4322 | |
4323 static int decode_end(AVCodecContext *avctx) | |
4324 { | |
4325 SnowContext *s = avctx->priv_data; | |
4326 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4327 slice_buffer_destroy(&s->sb); |
2967 | 4328 |
2138 | 4329 common_end(s); |
4330 | |
4331 return 0; | |
4332 } | |
4333 | |
4334 AVCodec snow_decoder = { | |
4335 "snow", | |
4336 CODEC_TYPE_VIDEO, | |
4337 CODEC_ID_SNOW, | |
4338 sizeof(SnowContext), | |
4339 decode_init, | |
4340 NULL, | |
4341 decode_end, | |
4342 decode_frame, | |
4343 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
4344 NULL | |
4345 }; | |
4346 | |
5224 | 4347 #ifdef CONFIG_SNOW_ENCODER |
2138 | 4348 AVCodec snow_encoder = { |
4349 "snow", | |
4350 CODEC_TYPE_VIDEO, | |
4351 CODEC_ID_SNOW, | |
4352 sizeof(SnowContext), | |
4353 encode_init, | |
4354 encode_frame, | |
4355 encode_end, | |
4356 }; | |
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4357 #endif |
2138 | 4358 |
4359 | |
4360 #if 0 | |
4361 #undef malloc | |
4362 #undef free | |
4363 #undef printf | |
5380
389366aa3458
Fix the self tests which are contained in some codecs and are using random().
takis
parents:
5254
diff
changeset
|
4364 #undef random |
2138 | 4365 |
4366 int main(){ | |
4367 int width=256; | |
4368 int height=256; | |
4369 int buffer[2][width*height]; | |
4370 SnowContext s; | |
4371 int i; | |
4372 s.spatial_decomposition_count=6; | |
4373 s.spatial_decomposition_type=1; | |
2967 | 4374 |
2138 | 4375 printf("testing 5/3 DWT\n"); |
4376 for(i=0; i<width*height; i++) | |
4377 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
2967 | 4378 |
2951 | 4379 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4380 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4381 |
2138 | 4382 for(i=0; i<width*height; i++) |
4383 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); | |
4384 | |
4385 printf("testing 9/7 DWT\n"); | |
4386 s.spatial_decomposition_type=0; | |
4387 for(i=0; i<width*height; i++) | |
4388 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
2967 | 4389 |
2951 | 4390 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4391 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4392 |
2138 | 4393 for(i=0; i<width*height; i++) |
4001 | 4394 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); |
2967 | 4395 |
2951 | 4396 #if 0 |
2138 | 4397 printf("testing AC coder\n"); |
4398 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4399 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
2138 | 4400 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4401 |
2138 | 4402 for(i=-256; i<256; i++){ |
4403 START_TIMER | |
4001 | 4404 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
2138 | 4405 STOP_TIMER("put_symbol") |
4406 } | |
2335 | 4407 ff_rac_terminate(&s.c); |
2138 | 4408 |
4409 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4410 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
2138 | 4411 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4412 |
2138 | 4413 for(i=-256; i<256; i++){ |
4414 int j; | |
4415 START_TIMER | |
4416 j= get_symbol(&s.c, s.header_state, 1); | |
4417 STOP_TIMER("get_symbol") | |
4001 | 4418 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
2138 | 4419 } |
2951 | 4420 #endif |
2138 | 4421 { |
4422 int level, orientation, x, y; | |
4423 int64_t errors[8][4]; | |
4424 int64_t g=0; | |
4425 | |
4426 memset(errors, 0, sizeof(errors)); | |
4427 s.spatial_decomposition_count=3; | |
4428 s.spatial_decomposition_type=0; | |
4429 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4430 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4431 int w= width >> (s.spatial_decomposition_count-level); | |
4432 int h= height >> (s.spatial_decomposition_count-level); | |
4433 int stride= width << (s.spatial_decomposition_count-level); | |
4434 DWTELEM *buf= buffer[0]; | |
4435 int64_t error=0; | |
4436 | |
4437 if(orientation&1) buf+=w; | |
4438 if(orientation>1) buf+=stride>>1; | |
2967 | 4439 |
2138 | 4440 memset(buffer[0], 0, sizeof(int)*width*height); |
4441 buf[w/2 + h/2*stride]= 256*256; | |
2951 | 4442 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4443 for(y=0; y<height; y++){ |
4444 for(x=0; x<width; x++){ | |
4445 int64_t d= buffer[0][x + y*width]; | |
4446 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4447 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); |
2138 | 4448 } |
4001 | 4449 if(FFABS(height/2-y)<9 && level==2) printf("\n"); |
2138 | 4450 } |
4451 error= (int)(sqrt(error)+0.5); | |
4452 errors[level][orientation]= error; | |
4453 if(g) g=ff_gcd(g, error); | |
4454 else g= error; | |
4455 } | |
4456 } | |
4457 printf("static int const visual_weight[][4]={\n"); | |
4458 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4459 printf(" {"); | |
4460 for(orientation=0; orientation<4; orientation++){ | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4461 printf("%8"PRId64",", errors[level][orientation]/g); |
2138 | 4462 } |
4463 printf("},\n"); | |
4464 } | |
4465 printf("};\n"); | |
4466 { | |
4467 int level=2; | |
4468 int orientation=3; | |
4469 int w= width >> (s.spatial_decomposition_count-level); | |
4470 int h= height >> (s.spatial_decomposition_count-level); | |
4471 int stride= width << (s.spatial_decomposition_count-level); | |
4472 DWTELEM *buf= buffer[0]; | |
4473 int64_t error=0; | |
4474 | |
4475 buf+=w; | |
4476 buf+=stride>>1; | |
2967 | 4477 |
2138 | 4478 memset(buffer[0], 0, sizeof(int)*width*height); |
4479 #if 1 | |
4480 for(y=0; y<height; y++){ | |
4481 for(x=0; x<width; x++){ | |
4482 int tab[4]={0,2,3,1}; | |
4483 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
4484 } | |
4485 } | |
2951 | 4486 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4487 #else |
4488 for(y=0; y<h; y++){ | |
4489 for(x=0; x<w; x++){ | |
4490 buf[x + y*stride ]=169; | |
4491 buf[x + y*stride-w]=64; | |
4492 } | |
4493 } | |
2951 | 4494 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4495 #endif |
4496 for(y=0; y<height; y++){ | |
4497 for(x=0; x<width; x++){ | |
4498 int64_t d= buffer[0][x + y*width]; | |
4499 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4500 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
2138 | 4501 } |
4001 | 4502 if(FFABS(height/2-y)<9) printf("\n"); |
2138 | 4503 } |
4504 } | |
4505 | |
4506 } | |
4507 return 0; | |
4508 } | |
4509 #endif | |
4510 |