Mercurial > libavcodec.hg
annotate snow.c @ 5925:4127277ac925 libavcodec
Remove unused functions, fixes the warnings:
snow.c:1163: warning: 'spatial_compose53i' defined but not used
snow.c:1313: warning: 'spatial_compose97i' defined but not used
author | diego |
---|---|
date | Fri, 16 Nov 2007 23:15:39 +0000 |
parents | cf1eed15eb87 |
children | bd5f4dc81fbe |
rev | line source |
---|---|
2138 | 1 /* |
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2138 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2138 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2138 | 19 */ |
20 | |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
2335 | 24 |
25 #include "rangecoder.h" | |
2138 | 26 |
27 #include "mpegvideo.h" | |
28 | |
29 #undef NDEBUG | |
30 #include <assert.h> | |
31 | |
32 static const int8_t quant3[256]={ | |
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
49 }; | |
50 static const int8_t quant3b[256]={ | |
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 }; | |
2596 | 68 static const int8_t quant3bA[256]={ |
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 }; | |
2138 | 86 static const int8_t quant5[256]={ |
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
103 }; | |
104 static const int8_t quant7[256]={ | |
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
121 }; | |
122 static const int8_t quant9[256]={ | |
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
139 }; | |
140 static const int8_t quant11[256]={ | |
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
157 }; | |
158 static const int8_t quant13[256]={ | |
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
175 }; | |
176 | |
177 #if 0 //64*cubic | |
178 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 211 //error:0.000022 |
212 }; | |
213 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
2138 | 230 //error:0.000033 |
231 }; | |
232 #elif 1 // 64*linear | |
233 static const uint8_t obmc32[1024]={ | |
3206 | 234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
2138 | 266 //error:0.000020 |
267 }; | |
268 static const uint8_t obmc16[256]={ | |
3206 | 269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
2138 | 285 //error:0.000015 |
286 }; | |
287 #else //64*cos | |
288 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 321 //error:0.000022 |
322 }; | |
323 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
2138 | 340 //error:0.000022 |
341 }; | |
5910 | 342 #endif /* 0 */ |
2138 | 343 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
344 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 static const uint8_t obmc8[64]={ |
3206 | 346 4, 12, 20, 28, 28, 20, 12, 4, |
347 12, 36, 60, 84, 84, 60, 36, 12, | |
348 20, 60,100,140,140,100, 60, 20, | |
349 28, 84,140,196,196,140, 84, 28, | |
350 28, 84,140,196,196,140, 84, 28, | |
351 20, 60,100,140,140,100, 60, 20, | |
352 12, 36, 60, 84, 84, 60, 36, 12, | |
353 4, 12, 20, 28, 28, 20, 12, 4, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
354 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 static const uint8_t obmc4[16]={ |
3206 | 359 16, 48, 48, 16, |
360 48,144,144, 48, | |
361 48,144,144, 48, | |
362 16, 48, 48, 16, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
363 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 static const uint8_t *obmc_tab[4]={ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
367 obmc32, obmc16, obmc8, obmc4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
372 typedef struct BlockNode{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 int16_t mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t my; |
3314 | 375 uint8_t ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
376 uint8_t color[3]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 //#define TYPE_SPLIT 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 #define BLOCK_INTRA 1 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
380 #define BLOCK_OPT 2 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
381 //#define TYPE_NOCOLOR 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 uint8_t level; //FIXME merge into type? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 }BlockNode; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
385 static const BlockNode null_block= { //FIXME add border maybe |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 .color= {128,128,128}, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .mx= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .my= 0, |
3314 | 389 .ref= 0, |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
390 .type= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .level= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 }; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
394 #define LOG2_MB_SIZE 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define MB_SIZE (1<<LOG2_MB_SIZE) |
5575 | 396 #define ENCODER_EXTRA_BITS 4 |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
397 #define HTAPS_MAX 8 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
398 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
399 typedef struct x_and_coeff{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
400 int16_t x; |
2596 | 401 uint16_t coeff; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
402 } x_and_coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
403 |
2138 | 404 typedef struct SubBand{ |
405 int level; | |
406 int stride; | |
407 int width; | |
408 int height; | |
409 int qlog; ///< log(qscale)/log[2^(1/6)] | |
410 DWTELEM *buf; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
411 IDWTELEM *ibuf; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
412 int buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int buf_y_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 int stride_line; ///< Stride measured in lines, not pixels. |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
415 x_and_coeff * x_coeff; |
2138 | 416 struct SubBand *parent; |
417 uint8_t state[/*7*2*/ 7 + 512][32]; | |
418 }SubBand; | |
419 | |
420 typedef struct Plane{ | |
421 int width; | |
422 int height; | |
423 SubBand band[MAX_DECOMPOSITIONS][4]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
424 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
425 int htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
426 int8_t hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
427 int diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
428 int fast_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
429 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
430 int last_htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
431 int8_t last_hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
432 int last_diag_mc; |
2138 | 433 }Plane; |
434 | |
435 typedef struct SnowContext{ | |
4588
fc155ff94878
cosmetics: Fix another common typo, dependAnt --> dependEnt.
diego
parents:
4494
diff
changeset
|
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 437 |
438 AVCodecContext *avctx; | |
2335 | 439 RangeCoder c; |
2138 | 440 DSPContext dsp; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
441 AVFrame new_picture; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
442 AVFrame input_picture; ///< new_picture with the internal linesizes |
2138 | 443 AVFrame current_picture; |
3314 | 444 AVFrame last_picture[MAX_REF_FRAMES]; |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; |
2138 | 446 AVFrame mconly_picture; |
447 // uint8_t q_context[16]; | |
448 uint8_t header_state[32]; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
449 uint8_t block_state[128 + 32*128]; |
2138 | 450 int keyframe; |
2199 | 451 int always_reset; |
2138 | 452 int version; |
453 int spatial_decomposition_type; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
454 int last_spatial_decomposition_type; |
2138 | 455 int temporal_decomposition_type; |
456 int spatial_decomposition_count; | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
457 int last_spatial_decomposition_count; |
2138 | 458 int temporal_decomposition_count; |
3314 | 459 int max_ref_frames; |
460 int ref_frames; | |
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
462 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
2138 | 463 DWTELEM *spatial_dwt_buffer; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
464 IDWTELEM *spatial_idwt_buffer; |
2138 | 465 int colorspace_type; |
466 int chroma_h_shift; | |
467 int chroma_v_shift; | |
468 int spatial_scalability; | |
469 int qlog; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
470 int last_qlog; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
471 int lambda; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 int lambda2; |
3313 | 473 int pass1_rc; |
2138 | 474 int mv_scale; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
475 int last_mv_scale; |
2138 | 476 int qbias; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
477 int last_qbias; |
2138 | 478 #define QBIAS_SHIFT 3 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
479 int b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
480 int b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
481 int block_max_depth; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
482 int last_block_max_depth; |
2138 | 483 Plane plane[MAX_PLANES]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
484 BlockNode *block; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
485 #define ME_CACHE_SIZE 1024 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
486 int me_cache[ME_CACHE_SIZE]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
487 int me_cache_generation; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
488 slice_buffer sb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
489 |
4588
fc155ff94878
cosmetics: Fix another common typo, dependAnt --> dependEnt.
diego
parents:
4494
diff
changeset
|
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 491 }SnowContext; |
492 | |
2562 | 493 typedef struct { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
494 IDWTELEM *b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
495 IDWTELEM *b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
496 IDWTELEM *b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
497 IDWTELEM *b3; |
2562 | 498 int y; |
499 } dwt_compose_t; | |
500 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
503 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
504 static void iterative_me(SnowContext *s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
505 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
507 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
508 int i; |
2967 | 509 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 buf->base_buffer = base_buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 buf->line_count = line_count; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
512 buf->line_width = line_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
513 buf->data_count = max_allocated_lines; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
2967 | 516 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
517 for (i = 0; i < max_allocated_lines; i++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
518 { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
520 } |
2967 | 521 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 buf->data_stack_top = max_allocated_lines - 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
523 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
524 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
527 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
528 IDWTELEM * buffer; |
2967 | 529 |
530 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); | |
531 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 assert(buf->data_stack_top >= 0); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
533 // assert(!buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
534 if (buf->line[line]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
535 return buf->line[line]; |
2967 | 536 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
538 buffer = buf->data_stack[buf->data_stack_top]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
539 buf->data_stack_top--; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 buf->line[line] = buffer; |
2967 | 541 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); |
2967 | 543 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 return buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
545 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
546 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 static void slice_buffer_release(slice_buffer * buf, int line) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
548 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
550 IDWTELEM * buffer; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 assert(line >= 0 && line < buf->line_count); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 assert(buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 buffer = buf->line[line]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 buf->data_stack_top++; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 buf->data_stack[buf->data_stack_top] = buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
559 buf->line[line] = NULL; |
2967 | 560 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
561 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
563 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 static void slice_buffer_flush(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 for (i = 0; i < buf->line_count; i++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
569 if (buf->line[i]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
570 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
571 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
572 slice_buffer_release(buf, i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
573 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
574 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
575 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
576 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
577 static void slice_buffer_destroy(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
578 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
579 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
580 slice_buffer_flush(buf); |
2967 | 581 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
582 for (i = buf->data_count - 1; i >= 0; i--) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
583 { |
3190 | 584 av_freep(&buf->data_stack[i]); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
585 } |
3190 | 586 av_freep(&buf->data_stack); |
587 av_freep(&buf->line); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
588 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
589 |
2979 | 590 #ifdef __sgi |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
591 // Avoid a name clash on SGI IRIX |
2979 | 592 #undef qexp |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
593 #endif |
2246 | 594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
2600 | 595 static uint8_t qexp[QROOT]; |
2138 | 596 |
597 static inline int mirror(int v, int m){ | |
2998 | 598 while((unsigned)v > (unsigned)m){ |
599 v=-v; | |
600 if(v<0) v+= 2*m; | |
601 } | |
602 return v; | |
2138 | 603 } |
604 | |
2335 | 605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
2138 | 606 int i; |
607 | |
608 if(v){ | |
4001 | 609 const int a= FFABS(v); |
2138 | 610 const int e= av_log2(a); |
611 #if 1 | |
2967 | 612 const int el= FFMIN(e, 10); |
2335 | 613 put_rac(c, state+0, 0); |
2138 | 614 |
615 for(i=0; i<el; i++){ | |
2335 | 616 put_rac(c, state+1+i, 1); //1..10 |
2138 | 617 } |
618 for(; i<e; i++){ | |
2335 | 619 put_rac(c, state+1+9, 1); //1..10 |
2138 | 620 } |
2335 | 621 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 622 |
623 for(i=e-1; i>=el; i--){ | |
2335 | 624 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
2138 | 625 } |
626 for(; i>=0; i--){ | |
2335 | 627 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 628 } |
629 | |
630 if(is_signed) | |
2335 | 631 put_rac(c, state+11 + el, v < 0); //11..21 |
2138 | 632 #else |
2967 | 633 |
2335 | 634 put_rac(c, state+0, 0); |
2138 | 635 if(e<=9){ |
636 for(i=0; i<e; i++){ | |
2335 | 637 put_rac(c, state+1+i, 1); //1..10 |
2138 | 638 } |
2335 | 639 put_rac(c, state+1+i, 0); |
2138 | 640 |
641 for(i=e-1; i>=0; i--){ | |
2335 | 642 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 643 } |
644 | |
645 if(is_signed) | |
2335 | 646 put_rac(c, state+11 + e, v < 0); //11..21 |
2138 | 647 }else{ |
648 for(i=0; i<e; i++){ | |
2335 | 649 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
2138 | 650 } |
2335 | 651 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 652 |
653 for(i=e-1; i>=0; i--){ | |
2335 | 654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
2138 | 655 } |
656 | |
657 if(is_signed) | |
2335 | 658 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
2138 | 659 } |
5910 | 660 #endif /* 1 */ |
2138 | 661 }else{ |
2335 | 662 put_rac(c, state+0, 1); |
2138 | 663 } |
664 } | |
665 | |
2335 | 666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
667 if(get_rac(c, state+0)) | |
2138 | 668 return 0; |
669 else{ | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
670 int i, e, a; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
671 e= 0; |
2335 | 672 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
673 e++; |
2138 | 674 } |
675 | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
676 a= 1; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
677 for(i=e-1; i>=0; i--){ |
2335 | 678 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
679 } |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
680 |
2335 | 681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
2138 | 682 return -a; |
683 else | |
684 return a; | |
685 } | |
686 } | |
687 | |
2335 | 688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
689 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
690 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
691 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
692 assert(v>=0); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
693 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
694 |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
695 while(v >= r){ |
2335 | 696 put_rac(c, state+4+log2, 1); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
697 v -= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
698 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
699 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
700 } |
2335 | 701 put_rac(c, state+4+log2, 0); |
2967 | 702 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
703 for(i=log2-1; i>=0; i--){ |
2335 | 704 put_rac(c, state+31-i, (v>>i)&1); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
705 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
706 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
707 |
2335 | 708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
709 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
710 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
711 int v=0; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
713 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
714 |
2335 | 715 while(get_rac(c, state+4+log2)){ |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
716 v+= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
717 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
718 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
719 } |
2967 | 720 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
721 for(i=log2-1; i>=0; i--){ |
2335 | 722 v+= get_rac(c, state+31-i)<<i; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
723 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
724 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
725 return v; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
726 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
727 |
5627 | 728 static av_always_inline void |
729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
730 int dst_step, int src_step, int ref_step, | |
731 int width, int mul, int add, int shift, | |
732 int highpass, int inverse){ | |
2138 | 733 const int mirror_left= !highpass; |
734 const int mirror_right= (width&1) ^ highpass; | |
735 const int w= (width>>1) - 1 + (highpass & width); | |
736 int i; | |
737 | |
738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
739 if(mirror_left){ | |
740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
741 dst += dst_step; | |
742 src += src_step; | |
743 } | |
2967 | 744 |
2138 | 745 for(i=0; i<w; i++){ |
5627 | 746 dst[i*dst_step] = |
747 LIFT(src[i*src_step], | |
748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
749 inverse); | |
2138 | 750 } |
2967 | 751 |
2138 | 752 if(mirror_right){ |
5627 | 753 dst[w*dst_step] = |
754 LIFT(src[w*src_step], | |
755 ((mul*2*ref[w*ref_step]+add)>>shift), | |
756 inverse); | |
2138 | 757 } |
758 } | |
759 | |
5627 | 760 static av_always_inline void |
761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
762 int dst_step, int src_step, int ref_step, | |
763 int width, int mul, int add, int shift, | |
764 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
765 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
766 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
767 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
768 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
769 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
771 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
773 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
774 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
775 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
776 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
777 for(i=0; i<w; i++){ |
5627 | 778 dst[i*dst_step] = |
779 LIFT(src[i*src_step], | |
780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
781 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
782 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
783 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
784 if(mirror_right){ |
5627 | 785 dst[w*dst_step] = |
786 LIFT(src[w*src_step], | |
787 ((mul*2*ref[w*ref_step]+add)>>shift), | |
788 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
789 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
790 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
791 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
792 #ifndef liftS |
5627 | 793 static av_always_inline void |
794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
795 int dst_step, int src_step, int ref_step, | |
796 int width, int mul, int add, int shift, | |
797 int highpass, int inverse){ | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
798 const int mirror_left= !highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 const int mirror_right= (width&1) ^ highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
800 const int w= (width>>1) - 1 + (highpass & width); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
801 int i; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
802 |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
803 assert(shift == 4); |
5627 | 804 #define LIFTS(src, ref, inv) \ |
805 ((inv) ? \ | |
806 (src) + (((ref) + 4*(src))>>shift): \ | |
807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
808 if(mirror_left){ |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
810 dst += dst_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
811 src += src_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
812 } |
2967 | 813 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
814 for(i=0; i<w; i++){ |
5627 | 815 dst[i*dst_step] = |
816 LIFTS(src[i*src_step], | |
817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
818 inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
819 } |
2967 | 820 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
821 if(mirror_right){ |
5627 | 822 dst[w*dst_step] = |
823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
824 } |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
825 } |
5627 | 826 static av_always_inline void |
827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
828 int dst_step, int src_step, int ref_step, | |
829 int width, int mul, int add, int shift, | |
830 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
831 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
835 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
836 assert(shift == 4); |
5627 | 837 #define LIFTS(src, ref, inv) \ |
838 ((inv) ? \ | |
839 (src) + (((ref) + 4*(src))>>shift): \ | |
840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
841 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
843 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
844 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
845 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
846 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
847 for(i=0; i<w; i++){ |
5627 | 848 dst[i*dst_step] = |
849 LIFTS(src[i*src_step], | |
850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
851 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
852 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
853 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
854 if(mirror_right){ |
5627 | 855 dst[w*dst_step] = |
856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
857 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
858 } |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
859 #endif |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
860 |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
861 static void horizontal_decompose53i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
862 DWTELEM temp[width]; |
2138 | 863 const int width2= width>>1; |
2893 | 864 int x; |
2138 | 865 const int w2= (width+1)>>1; |
866 | |
867 for(x=0; x<width2; x++){ | |
868 temp[x ]= b[2*x ]; | |
869 temp[x+w2]= b[2*x + 1]; | |
870 } | |
871 if(width&1) | |
872 temp[x ]= b[2*x ]; | |
873 #if 0 | |
2893 | 874 { |
875 int A1,A2,A3,A4; | |
2138 | 876 A2= temp[1 ]; |
877 A4= temp[0 ]; | |
878 A1= temp[0+width2]; | |
879 A1 -= (A2 + A4)>>1; | |
880 A4 += (A1 + 1)>>1; | |
881 b[0+width2] = A1; | |
882 b[0 ] = A4; | |
883 for(x=1; x+1<width2; x+=2){ | |
884 A3= temp[x+width2]; | |
885 A4= temp[x+1 ]; | |
886 A3 -= (A2 + A4)>>1; | |
887 A2 += (A1 + A3 + 2)>>2; | |
888 b[x+width2] = A3; | |
889 b[x ] = A2; | |
890 | |
891 A1= temp[x+1+width2]; | |
892 A2= temp[x+2 ]; | |
893 A1 -= (A2 + A4)>>1; | |
894 A4 += (A1 + A3 + 2)>>2; | |
895 b[x+1+width2] = A1; | |
896 b[x+1 ] = A4; | |
897 } | |
898 A3= temp[width-1]; | |
899 A3 -= A2; | |
900 A2 += (A1 + A3 + 2)>>2; | |
901 b[width -1] = A3; | |
902 b[width2-1] = A2; | |
2893 | 903 } |
2967 | 904 #else |
2138 | 905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
907 #endif | |
908 } | |
909 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 911 int i; |
2967 | 912 |
2138 | 913 for(i=0; i<width; i++){ |
914 b1[i] -= (b0[i] + b2[i])>>1; | |
915 } | |
916 } | |
917 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 919 int i; |
2967 | 920 |
2138 | 921 for(i=0; i<width; i++){ |
922 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
923 } | |
924 } | |
925 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 927 int y; |
2138 | 928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
2967 | 930 |
2138 | 931 for(y=-2; y<height; y+=2){ |
932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
934 | |
935 {START_TIMER | |
2998 | 936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
2138 | 938 STOP_TIMER("horizontal_decompose53i")} |
2967 | 939 |
2138 | 940 {START_TIMER |
2998 | 941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
2138 | 943 STOP_TIMER("vertical_decompose53i*")} |
2967 | 944 |
2138 | 945 b0=b2; |
946 b1=b3; | |
947 } | |
948 } | |
949 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
950 static void horizontal_decompose97i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
951 DWTELEM temp[width]; |
2138 | 952 const int w2= (width+1)>>1; |
953 | |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
2138 | 957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
958 } | |
959 | |
960 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 962 int i; |
2967 | 963 |
2138 | 964 for(i=0; i<width; i++){ |
965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
966 } | |
967 } | |
968 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 970 int i; |
2967 | 971 |
2138 | 972 for(i=0; i<width; i++){ |
973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
974 } | |
975 } | |
976 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 978 int i; |
2967 | 979 |
2138 | 980 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
981 #ifdef liftS |
2138 | 982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
983 #else |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
985 #endif |
2138 | 986 } |
987 } | |
988 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 990 int i; |
2967 | 991 |
2138 | 992 for(i=0; i<width; i++){ |
993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
994 } | |
995 } | |
996 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 998 int y; |
2138 | 999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
1000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
1001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
1002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
2967 | 1003 |
2138 | 1004 for(y=-4; y<height; y+=2){ |
1005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
1006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
1007 | |
1008 {START_TIMER | |
2998 | 1009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
1010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
2138 | 1011 if(width>400){ |
1012 STOP_TIMER("horizontal_decompose97i") | |
1013 }} | |
2967 | 1014 |
2138 | 1015 {START_TIMER |
2998 | 1016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
1017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
1018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
1019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
2138 | 1020 |
1021 if(width>400){ | |
1022 STOP_TIMER("vertical_decompose97i") | |
1023 }} | |
2967 | 1024 |
2138 | 1025 b0=b2; |
1026 b1=b3; | |
1027 b2=b4; | |
1028 b3=b5; | |
1029 } | |
1030 } | |
1031 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2138 | 1033 int level; |
2967 | 1034 |
2164 | 1035 for(level=0; level<decomposition_count; level++){ |
1036 switch(type){ | |
3326 | 1037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
2138 | 1039 } |
1040 } | |
1041 } | |
1042 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1043 static void horizontal_compose53i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1044 IDWTELEM temp[width]; |
2138 | 1045 const int width2= width>>1; |
1046 const int w2= (width+1)>>1; | |
2893 | 1047 int x; |
2138 | 1048 |
1049 #if 0 | |
2893 | 1050 int A1,A2,A3,A4; |
2138 | 1051 A2= temp[1 ]; |
1052 A4= temp[0 ]; | |
1053 A1= temp[0+width2]; | |
1054 A1 -= (A2 + A4)>>1; | |
1055 A4 += (A1 + 1)>>1; | |
1056 b[0+width2] = A1; | |
1057 b[0 ] = A4; | |
1058 for(x=1; x+1<width2; x+=2){ | |
1059 A3= temp[x+width2]; | |
1060 A4= temp[x+1 ]; | |
1061 A3 -= (A2 + A4)>>1; | |
1062 A2 += (A1 + A3 + 2)>>2; | |
1063 b[x+width2] = A3; | |
1064 b[x ] = A2; | |
1065 | |
1066 A1= temp[x+1+width2]; | |
1067 A2= temp[x+2 ]; | |
1068 A1 -= (A2 + A4)>>1; | |
1069 A4 += (A1 + A3 + 2)>>2; | |
1070 b[x+1+width2] = A1; | |
1071 b[x+1 ] = A4; | |
1072 } | |
1073 A3= temp[width-1]; | |
1074 A3 -= A2; | |
1075 A2 += (A1 + A3 + 2)>>2; | |
1076 b[width -1] = A3; | |
1077 b[width2-1] = A2; | |
2967 | 1078 #else |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
5910 | 1081 #endif /* 0 */ |
2138 | 1082 for(x=0; x<width2; x++){ |
1083 b[2*x ]= temp[x ]; | |
1084 b[2*x + 1]= temp[x+w2]; | |
1085 } | |
1086 if(width&1) | |
1087 b[2*x ]= temp[x ]; | |
1088 } | |
1089 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1091 int i; |
2967 | 1092 |
2138 | 1093 for(i=0; i<width; i++){ |
1094 b1[i] += (b0[i] + b2[i])>>1; | |
1095 } | |
1096 } | |
1097 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1099 int i; |
2967 | 1100 |
2138 | 1101 for(i=0; i<width; i++){ |
1102 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1103 } | |
1104 } | |
1105 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 cs->y = -1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1111 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1113 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1114 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1115 cs->y = -1; | |
1116 } | |
1117 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1119 int y= cs->y; |
2967 | 1120 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1121 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1122 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1125 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1126 {START_TIMER |
2998 | 1127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1129 STOP_TIMER("vertical_compose53i*")} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1130 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1131 {START_TIMER |
2998 | 1132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1134 STOP_TIMER("horizontal_compose53i")} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1135 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1136 cs->b0 = b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1137 cs->b1 = b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1138 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1139 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1140 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1142 int y= cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1143 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1144 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
2138 | 1147 |
1148 {START_TIMER | |
2998 | 1149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
2138 | 1151 STOP_TIMER("vertical_compose53i*")} |
1152 | |
1153 {START_TIMER | |
2998 | 1154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2138 | 1156 STOP_TIMER("horizontal_compose53i")} |
1157 | |
2562 | 1158 cs->b0 = b2; |
1159 cs->b1 = b3; | |
1160 cs->y += 2; | |
1161 } | |
1162 | |
2967 | 1163 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1164 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1165 IDWTELEM temp[width]; |
2138 | 1166 const int w2= (width+1)>>1; |
1167 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1168 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1169 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1170 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1171 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
2138 | 1172 } |
1173 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1174 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1175 int i; |
2967 | 1176 |
2138 | 1177 for(i=0; i<width; i++){ |
1178 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1179 } | |
1180 } | |
1181 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1182 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1183 int i; |
2967 | 1184 |
2138 | 1185 for(i=0; i<width; i++){ |
1186 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1187 } | |
1188 } | |
1189 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1190 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1191 int i; |
2967 | 1192 |
2138 | 1193 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1194 #ifdef liftS |
2138 | 1195 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1196 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1197 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1198 #endif |
2138 | 1199 } |
1200 } | |
1201 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1202 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1203 int i; |
2967 | 1204 |
2138 | 1205 for(i=0; i<width; i++){ |
1206 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1207 } | |
1208 } | |
1209 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1210 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
2592 | 1211 int i; |
2967 | 1212 |
2592 | 1213 for(i=0; i<width; i++){ |
1214 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1215 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1216 #ifdef liftS |
2592 | 1217 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1218 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1219 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1220 #endif |
2592 | 1221 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1222 } | |
1223 } | |
1224 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1225 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1226 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1227 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1228 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1229 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1230 cs->y = -3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1231 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1232 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1233 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1234 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1235 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1236 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1237 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1238 cs->y = -3; | |
1239 } | |
2138 | 1240 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1241 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1242 int y = cs->y; |
2967 | 1243 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1244 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1245 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1246 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1247 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1248 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1249 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
2967 | 1250 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1251 {START_TIMER |
2592 | 1252 if(y>0 && y+4<height){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1253 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
2592 | 1254 }else{ |
2998 | 1255 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1256 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1257 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1258 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2592 | 1259 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1260 if(width>400){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1261 STOP_TIMER("vertical_compose97i")}} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1262 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1263 {START_TIMER |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1264 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1265 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
3012 | 1266 if(width>400 && y+0<(unsigned)height){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1267 STOP_TIMER("horizontal_compose97i")}} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1268 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1269 cs->b0=b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1270 cs->b1=b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1271 cs->b2=b4; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1272 cs->b3=b5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1273 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1274 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1275 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1276 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1277 int y = cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1278 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1279 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1280 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1281 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1282 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1283 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
2138 | 1284 |
1285 {START_TIMER | |
2998 | 1286 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1287 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1288 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1289 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2138 | 1290 if(width>400){ |
1291 STOP_TIMER("vertical_compose97i")}} | |
1292 | |
1293 {START_TIMER | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1294 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1295 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
2138 | 1296 if(width>400 && b0 <= b2){ |
1297 STOP_TIMER("horizontal_compose97i")}} | |
2562 | 1298 |
1299 cs->b0=b2; | |
1300 cs->b1=b3; | |
1301 cs->b2=b4; | |
1302 cs->b3=b5; | |
1303 cs->y += 2; | |
1304 } | |
1305 | |
3075 | 1306 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1307 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1308 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1309 switch(type){ |
3326 | 1310 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1311 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1312 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1313 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1314 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1315 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1316 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1317 int level; |
1318 for(level=decomposition_count-1; level>=0; level--){ | |
1319 switch(type){ | |
3326 | 1320 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1321 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
2562 | 1322 } |
1323 } | |
1324 } | |
1325 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1326 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
2562 | 1327 const int support = type==1 ? 3 : 5; |
1328 int level; | |
1329 if(type==2) return; | |
1330 | |
1331 for(level=decomposition_count-1; level>=0; level--){ | |
1332 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1333 switch(type){ | |
3326 | 1334 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
2562 | 1335 break; |
3326 | 1336 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
2562 | 1337 break; |
1338 } | |
1339 } | |
2138 | 1340 } |
1341 } | |
1342 | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1343 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1344 const int support = type==1 ? 3 : 5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1345 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1346 if(type==2) return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1347 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1348 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1349 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1350 switch(type){ |
3326 | 1351 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1352 break; |
3326 | 1353 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1354 break; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1355 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1356 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1357 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1358 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1359 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1360 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1361 dwt_compose_t cs[MAX_DECOMPOSITIONS]; |
1362 int y; | |
1363 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1364 for(y=0; y<height; y+=4) | |
1365 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
2138 | 1366 } |
1367 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1368 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1369 const int w= b->width; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1370 const int h= b->height; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1371 int x, y; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1372 |
2138 | 1373 if(1){ |
1374 int run=0; | |
2149 | 1375 int runs[w*h]; |
2138 | 1376 int run_index=0; |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1377 int max_index; |
2967 | 1378 |
2138 | 1379 for(y=0; y<h; y++){ |
1380 for(x=0; x<w; x++){ | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1381 int v, p=0; |
2144 | 1382 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1383 v= src[x + y*stride]; |
2138 | 1384 |
1385 if(y){ | |
2149 | 1386 t= src[x + (y-1)*stride]; |
2138 | 1387 if(x){ |
2149 | 1388 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1389 } |
1390 if(x + 1 < w){ | |
2149 | 1391 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1392 } |
1393 } | |
1394 if(x){ | |
2149 | 1395 l= src[x - 1 + y*stride]; |
2144 | 1396 /*if(x > 1){ |
1397 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1398 else ll= src[x - 2 + y*stride]; | |
2138 | 1399 }*/ |
1400 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1401 if(parent){ |
2149 | 1402 int px= x>>1; |
1403 int py= y>>1; | |
2967 | 1404 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1405 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1406 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1407 if(!(/*ll|*/l|lt|t|rt|p)){ |
2138 | 1408 if(v){ |
1409 runs[run_index++]= run; | |
1410 run=0; | |
1411 }else{ | |
1412 run++; | |
1413 } | |
1414 } | |
1415 } | |
1416 } | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1417 max_index= run_index; |
2138 | 1418 runs[run_index++]= run; |
1419 run_index=0; | |
1420 run= runs[run_index++]; | |
1421 | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1422 put_symbol2(&s->c, b->state[30], max_index, 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1423 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1424 put_symbol2(&s->c, b->state[1], run, 3); |
2967 | 1425 |
2138 | 1426 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1427 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
2422 | 1428 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1429 return -1; | |
1430 } | |
2138 | 1431 for(x=0; x<w; x++){ |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1432 int v, p=0; |
2144 | 1433 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1434 v= src[x + y*stride]; |
2138 | 1435 |
1436 if(y){ | |
2149 | 1437 t= src[x + (y-1)*stride]; |
2138 | 1438 if(x){ |
2149 | 1439 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1440 } |
1441 if(x + 1 < w){ | |
2149 | 1442 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1443 } |
1444 } | |
1445 if(x){ | |
2149 | 1446 l= src[x - 1 + y*stride]; |
2144 | 1447 /*if(x > 1){ |
1448 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1449 else ll= src[x - 2 + y*stride]; | |
2138 | 1450 }*/ |
1451 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1452 if(parent){ |
2149 | 1453 int px= x>>1; |
1454 int py= y>>1; | |
2967 | 1455 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1456 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1457 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1458 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1459 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
2144 | 1460 |
2335 | 1461 put_rac(&s->c, &b->state[0][context], !!v); |
2138 | 1462 }else{ |
1463 if(!run){ | |
1464 run= runs[run_index++]; | |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1465 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1466 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1467 put_symbol2(&s->c, b->state[1], run, 3); |
2138 | 1468 assert(v); |
1469 }else{ | |
1470 run--; | |
1471 assert(!v); | |
1472 } | |
1473 } | |
1474 if(v){ | |
4001 | 1475 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
1476 int l2= 2*FFABS(l) + (l<0); | |
1477 int t2= 2*FFABS(t) + (t<0); | |
1478 | |
1479 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
2596 | 1480 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
2138 | 1481 } |
1482 } | |
1483 } | |
1484 } | |
2422 | 1485 return 0; |
2138 | 1486 } |
1487 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1488 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1489 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1490 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
2422 | 1491 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1492 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1493 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1494 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1495 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
2138 | 1496 const int w= b->width; |
1497 const int h= b->height; | |
1498 int x,y; | |
2967 | 1499 |
2138 | 1500 if(1){ |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1501 int run, runs; |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1502 x_and_coeff *xc= b->x_coeff; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1503 x_and_coeff *prev_xc= NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1504 x_and_coeff *prev2_xc= xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1505 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1506 x_and_coeff *prev_parent_xc= parent_xc; |
2138 | 1507 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1508 runs= get_symbol2(&s->c, b->state[30], 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1509 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1510 else run= INT_MAX; |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1511 |
2138 | 1512 for(y=0; y<h; y++){ |
2193 | 1513 int v=0; |
1514 int lt=0, t=0, rt=0; | |
1515 | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1516 if(y && prev_xc->x == 0){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1517 rt= prev_xc->coeff; |
2193 | 1518 } |
2138 | 1519 for(x=0; x<w; x++){ |
2193 | 1520 int p=0; |
1521 const int l= v; | |
2967 | 1522 |
2193 | 1523 lt= t; t= rt; |
1524 | |
2194 | 1525 if(y){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1526 if(prev_xc->x <= x) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1527 prev_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1528 if(prev_xc->x == x + 1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1529 rt= prev_xc->coeff; |
2194 | 1530 else |
1531 rt=0; | |
1532 } | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1533 if(parent_xc){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1534 if(x>>1 > parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1535 parent_xc++; |
2192 | 1536 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1537 if(x>>1 == parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1538 p= parent_xc->coeff; |
2194 | 1539 } |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1540 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1541 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1542 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
2144 | 1543 |
2335 | 1544 v=get_rac(&s->c, &b->state[0][context]); |
2605 | 1545 if(v){ |
1546 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1547 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
2967 | 1548 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1549 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1550 (xc++)->coeff= v; |
2605 | 1551 } |
2138 | 1552 }else{ |
1553 if(!run){ | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1554 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1555 else run= INT_MAX; |
2605 | 1556 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1557 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
2967 | 1558 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1559 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1560 (xc++)->coeff= v; |
2138 | 1561 }else{ |
2606 | 1562 int max_run; |
2138 | 1563 run--; |
1564 v=0; | |
2191 | 1565 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1566 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
2606 | 1567 else max_run= FFMIN(run, w-x-1); |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1568 if(parent_xc) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1569 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
2606 | 1570 x+= max_run; |
1571 run-= max_run; | |
2138 | 1572 } |
1573 } | |
2192 | 1574 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1575 (xc++)->x= w+1; //end marker |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1576 prev_xc= prev2_xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1577 prev2_xc= xc; |
2967 | 1578 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1579 if(parent_xc){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1580 if(y&1){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1581 while(parent_xc->x != parent->width+1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1582 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1583 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1584 prev_parent_xc= parent_xc; |
2192 | 1585 }else{ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1586 parent_xc= prev_parent_xc; |
2138 | 1587 } |
1588 } | |
1589 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1590 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1591 (xc++)->x= w+1; //end marker |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1592 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1593 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1594 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1595 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1596 const int w= b->width; |
2893 | 1597 int y; |
4594 | 1598 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 1599 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1600 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1601 int new_index = 0; |
2967 | 1602 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1603 START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1604 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1605 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1606 qadd= 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1607 qmul= 1<<QEXPSHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1608 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1609 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1610 /* If we are on the second or later slice, restore our index. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1611 if (start_y != 0) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1612 new_index = save_state[0]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1613 |
2967 | 1614 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1615 for(y=start_y; y<h; y++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1616 int x = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1617 int v; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1618 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1619 memset(line, 0, b->width*sizeof(IDWTELEM)); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1620 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1621 x = b->x_coeff[new_index++].x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1622 while(x < w) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1623 { |
2596 | 1624 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1625 register int u= -(v&1); | |
1626 line[x] = (t^u) - u; | |
1627 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1628 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1629 x = b->x_coeff[new_index++].x; |
2138 | 1630 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1631 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1632 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1633 STOP_TIMER("decode_subband") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1634 } |
2967 | 1635 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1636 /* Save our variables for the next slice. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1637 save_state[0] = new_index; |
2967 | 1638 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1639 return; |
2138 | 1640 } |
1641 | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1642 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
2138 | 1643 int plane_index, level, orientation; |
1644 | |
2199 | 1645 for(plane_index=0; plane_index<3; plane_index++){ |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
1646 for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
2138 | 1647 for(orientation=level ? 1:0; orientation<4; orientation++){ |
2335 | 1648 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
2138 | 1649 } |
1650 } | |
1651 } | |
2335 | 1652 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1653 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1654 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1655 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1656 static int alloc_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1657 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1658 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
2967 | 1659 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1660 s->b_width = w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 s->b_height= h; |
2967 | 1662 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1664 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 |
2335 | 1667 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1668 uint8_t *bytestream= d->bytestream; | |
1669 uint8_t *bytestream_start= d->bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 *d= *s; |
2335 | 1671 d->bytestream= bytestream; |
1672 d->bytestream_start= bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1673 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1674 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1675 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1676 static int pix_sum(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1677 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1678 int s, i, j; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1680 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1682 for (j = 0; j < w; j++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 s += pix[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1684 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1686 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1688 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 static int pix_norm1(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 int s, i, j; |
4179 | 1695 uint32_t *sq = ff_squareTbl + 256; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 for (j = 0; j < w; j ++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1700 s += sq[pix[0]]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 |
3314 | 1708 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1709 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1710 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1711 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1712 const int block_w= 1<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1713 BlockNode block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1714 int i,j; |
2967 | 1715 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1716 block.color[0]= l; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1717 block.color[1]= cb; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1718 block.color[2]= cr; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1719 block.mx= mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1720 block.my= my; |
3314 | 1721 block.ref= ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1722 block.type= type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1723 block.level= level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1724 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1725 for(j=0; j<block_w; j++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1726 for(i=0; i<block_w; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1727 s->block[index + i + j*w]= block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1728 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 const int offset[3]= { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 y*c-> stride + x, |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1738 int i; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1739 for(i=0; i<3; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1740 c->src[0][i]= src [i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1741 c->ref[0][i]= ref [i] + offset[i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1742 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1743 assert(!ref_index); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1744 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1745 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1746 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1747 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1748 if(s->ref_frames == 1){ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1749 *mx = mid_pred(left->mx, top->mx, tr->mx); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1750 *my = mid_pred(left->my, top->my, tr->my); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1751 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1752 const int *scale = scale_mv_ref[ref]; |
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1753 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1754 (top ->mx * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1755 (tr ->mx * scale[tr ->ref] + 128) >>8); |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1756 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1757 (top ->my * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1758 (tr ->my * scale[tr ->ref] + 128) >>8); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1759 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1760 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1761 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1762 //FIXME copy&paste |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1763 #define P_LEFT P[1] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1764 #define P_TOP P[2] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1765 #define P_TOPRIGHT P[3] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1766 #define P_MEDIAN P[4] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1767 #define P_MV1 P[9] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1768 #define FLAG_QPEL 1 //must be 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1769 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1771 uint8_t p_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1772 uint8_t i_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1773 uint8_t p_state[sizeof(s->block_state)]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1774 uint8_t i_state[sizeof(s->block_state)]; |
2335 | 1775 RangeCoder pc, ic; |
1776 uint8_t *pbbak= s->c.bytestream; | |
1777 uint8_t *pbbak_start= s->c.bytestream_start; | |
5082 | 1778 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1779 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1780 const int h= s->b_height << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1781 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1782 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1783 const int block_w= 1<<(LOG2_MB_SIZE - level); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 int trx= (x+1)<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 int try= (y+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1786 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1787 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1788 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1789 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1790 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1791 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1792 int pl = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1793 int pcb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1794 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1795 int pmx, pmy; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1796 int mx=0, my=0; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1797 int l,cr,cb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1798 const int stride= s->current_picture.linesize[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 const int uvstride= s->current_picture.linesize[1]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1800 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1801 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1802 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1803 int P[10][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1804 int16_t last_mv[3][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1805 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1806 const int shift= 1+qpel; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1807 MotionEstContext *c= &s->m.me; |
3314 | 1808 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1809 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
1810 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1811 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
3314 | 1812 int ref, best_ref, ref_score, ref_mx, ref_my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1813 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1814 assert(sizeof(s->block_state) >= 256); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1815 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1816 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1817 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1818 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1819 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1820 // clip predictors / edge ? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1821 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1822 P_LEFT[0]= left->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 P_LEFT[1]= left->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 P_TOP [0]= top->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 P_TOP [1]= top->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 P_TOPRIGHT[0]= tr->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 P_TOPRIGHT[1]= tr->my; |
2967 | 1828 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 last_mv[0][0]= s->block[index].mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 last_mv[0][1]= s->block[index].my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 last_mv[1][0]= right->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 last_mv[1][1]= right->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1833 last_mv[2][0]= bottom->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 last_mv[2][1]= bottom->my; |
2967 | 1835 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1836 s->m.mb_stride=2; |
2967 | 1837 s->m.mb_x= |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1838 s->m.mb_y= 0; |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1839 c->skip= 0; |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1840 |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1841 assert(c-> stride == stride); |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1842 assert(c->uvstride == uvstride); |
2967 | 1843 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1844 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1845 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1846 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1847 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
2967 | 1848 |
2206 | 1849 c->xmin = - x*block_w - 16+2; |
1850 c->ymin = - y*block_w - 16+2; | |
1851 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
1852 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1853 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1854 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
2967 | 1855 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1856 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1857 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1858 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1859 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1860 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1861 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1862 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1863 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1864 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1865 if (!y) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1866 c->pred_x= P_LEFT[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1867 c->pred_y= P_LEFT[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1868 } else { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1869 c->pred_x = P_MEDIAN[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1870 c->pred_y = P_MEDIAN[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1871 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1872 |
3314 | 1873 score= INT_MAX; |
1874 best_ref= 0; | |
1875 for(ref=0; ref<s->ref_frames; ref++){ | |
1876 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
1877 | |
1878 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
1879 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
1880 | |
1881 assert(ref_mx >= c->xmin); | |
1882 assert(ref_mx <= c->xmax); | |
1883 assert(ref_my >= c->ymin); | |
1884 assert(ref_my <= c->ymax); | |
1885 | |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1886 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
3314 | 1887 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
1888 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
1889 if(s->ref_mvs[ref]){ | |
1890 s->ref_mvs[ref][index][0]= ref_mx; | |
1891 s->ref_mvs[ref][index][1]= ref_my; | |
1892 s->ref_scores[ref][index]= ref_score; | |
1893 } | |
1894 if(score > ref_score){ | |
1895 score= ref_score; | |
1896 best_ref= ref; | |
1897 mx= ref_mx; | |
1898 my= ref_my; | |
1899 } | |
1900 } | |
5127 | 1901 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
2967 | 1902 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1903 // subpel search |
5085 | 1904 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1905 pc= s->c; |
2335 | 1906 pc.bytestream_start= |
1907 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1908 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1909 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1910 if(level!=s->block_max_depth) |
2335 | 1911 put_rac(&pc, &p_state[4 + s_context], 1); |
1912 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
3314 | 1913 if(s->ref_frames > 1) |
1914 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1915 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
3314 | 1916 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
1917 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
2335 | 1918 p_len= pc.bytestream - pc.bytestream_start; |
5082 | 1919 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1920 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1921 block_s= block_w*block_w; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1922 sum = pix_sum(current_data[0], stride, block_w); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1923 l= (sum + block_s/2)/block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1924 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2967 | 1925 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1926 block_s= block_w*block_w>>2; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1927 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1928 cb= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1929 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1930 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 cr= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1932 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1934 ic= s->c; |
2335 | 1935 ic.bytestream_start= |
1936 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1938 if(level!=s->block_max_depth) |
2335 | 1939 put_rac(&ic, &i_state[4 + s_context], 1); |
1940 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1941 put_symbol(&ic, &i_state[32], l-pl , 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1942 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1943 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
2335 | 1944 i_len= ic.bytestream - ic.bytestream_start; |
5082 | 1945 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1946 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1947 // assert(score==256*256*256*64-1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 assert(iscore < 255*255*256 + s->lambda2*10); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1949 assert(iscore >= 0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1950 assert(l>=0 && l<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1951 assert(pl>=0 && pl<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1952 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1953 if(level==0){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1954 int varc= iscore >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1955 int vard= score >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 if (vard <= 64 || vard < varc) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1957 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1958 else |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1959 c->scene_change_score+= s->m.qscale; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1960 } |
2967 | 1961 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1962 if(level!=s->block_max_depth){ |
2335 | 1963 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1964 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1965 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1966 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1967 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1968 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
2967 | 1969 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1970 if(score2 < score && score2 < iscore) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1971 return score2; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1972 } |
2967 | 1973 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1974 if(iscore < score){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1975 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2335 | 1976 memcpy(pbbak, i_buffer, i_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1977 s->c= ic; |
2335 | 1978 s->c.bytestream_start= pbbak_start; |
1979 s->c.bytestream= pbbak + i_len; | |
3314 | 1980 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1981 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1982 return iscore; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1983 }else{ |
2335 | 1984 memcpy(pbbak, p_buffer, p_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1985 s->c= pc; |
2335 | 1986 s->c.bytestream_start= pbbak_start; |
1987 s->c.bytestream= pbbak + p_len; | |
3314 | 1988 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1989 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1990 return score; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1991 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1992 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1993 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1994 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1995 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1996 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1997 }else{ |
3314 | 1998 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1999 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2002 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2003 const int w= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2004 const int rem_depth= s->block_max_depth - level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 const int index= (x + y*w) << rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2006 int trx= (x+1)<<rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2007 BlockNode *b= &s->block[index]; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2008 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2009 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2010 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2011 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2012 int pl = left->color[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2013 int pcb= left->color[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2014 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2015 int pmx, pmy; |
3314 | 2016 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 2017 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
2018 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2019 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2020 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2021 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2022 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2023 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2024 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2025 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2026 if(level!=s->block_max_depth){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2027 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
2028 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
2029 }else{ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2030 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2031 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2032 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2033 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2034 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2035 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2036 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2037 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2038 if(b->type & BLOCK_INTRA){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2039 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2040 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2041 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2042 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2043 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
3314 | 2044 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2045 }else{ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2046 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2047 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
3314 | 2048 if(s->ref_frames > 1) |
2049 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2050 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2051 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
3314 | 2052 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2053 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2054 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2055 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2056 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2057 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2058 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2059 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2060 int trx= (x+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2061 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2062 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2063 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2064 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
2967 | 2066 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2067 if(s->keyframe){ |
3314 | 2068 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2069 return; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2070 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2071 |
2335 | 2072 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
4332 | 2073 int type, mx, my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2074 int l = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2075 int cb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2076 int cr= left->color[2]; |
3314 | 2077 int ref = 0; |
2078 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
4001 | 2079 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
2080 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2967 | 2081 |
2335 | 2082 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2083 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2084 if(type){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2085 pred_mv(s, &mx, &my, 0, left, top, tr); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2086 l += get_symbol(&s->c, &s->block_state[32], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2087 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2088 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2089 }else{ |
3314 | 2090 if(s->ref_frames > 1) |
2091 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2092 pred_mv(s, &mx, &my, ref, left, top, tr); |
3314 | 2093 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
2094 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2095 } |
3314 | 2096 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2097 }else{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2098 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2099 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2100 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2101 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2102 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2103 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2104 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2105 static void encode_blocks(SnowContext *s, int search){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2106 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2107 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2108 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2109 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2110 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2111 iterative_me(s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2112 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2113 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2114 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2422 | 2115 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2116 return; | |
2117 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2118 for(x=0; x<w; x++){ |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2119 if(s->avctx->me_method == ME_ITER || !search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2120 encode_q_branch2(s, 0, x, y); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2121 else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2122 encode_q_branch (s, 0, x, y); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2123 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2124 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2125 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2126 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2127 static void decode_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2128 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2129 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2130 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2131 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2132 for(y=0; y<h; y++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2133 for(x=0; x<w; x++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2134 decode_q_branch(s, 0, x, y); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2135 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2136 } |
2138 | 2137 } |
2138 | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2139 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
5648 | 2140 const static uint8_t weight[64]={ |
2141 8,7,6,5,4,3,2,1, | |
2142 7,7,0,0,0,0,0,1, | |
2143 6,0,6,0,0,0,2,0, | |
2144 5,0,0,5,0,3,0,0, | |
2145 4,0,0,0,4,0,0,0, | |
2146 3,0,0,5,0,3,0,0, | |
2147 2,0,6,0,0,0,2,0, | |
2148 1,7,0,0,0,0,0,1, | |
2149 }; | |
2150 | |
2151 const static uint8_t brane[256]={ | |
2152 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, | |
2153 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2154 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2155 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2156 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2157 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2158 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2159 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2160 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2161 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2162 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2163 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2164 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2165 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2166 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2167 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2168 }; | |
2169 | |
2170 const static uint8_t needs[16]={ | |
2171 0,1,0,0, | |
2172 2,4,2,0, | |
2173 0,1,0,0, | |
2174 15 | |
2175 }; | |
2176 | |
2177 int x, y, b, r, l; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2178 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2179 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
5648 | 2180 int16_t *tmpI= tmpIt; |
2181 uint8_t *tmp2= tmp2t[0]; | |
2182 uint8_t *hpel[11]; | |
2221 | 2183 START_TIMER |
5648 | 2184 assert(dx<16 && dy<16); |
2185 r= brane[dx + 16*dy]&15; | |
2186 l= brane[dx + 16*dy]>>4; | |
2187 | |
2188 b= needs[l] | needs[r]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2189 if(p && !p->diag_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2190 b= 15; |
5648 | 2191 |
2192 if(b&5){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2193 for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
5649 | 2194 for(x=0; x < b_w; x++){ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2195 int a_1=src[x + HTAPS_MAX/2-4]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2196 int a0= src[x + HTAPS_MAX/2-3]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2197 int a1= src[x + HTAPS_MAX/2-2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2198 int a2= src[x + HTAPS_MAX/2-1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2199 int a3= src[x + HTAPS_MAX/2+0]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2200 int a4= src[x + HTAPS_MAX/2+1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2201 int a5= src[x + HTAPS_MAX/2+2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2202 int a6= src[x + HTAPS_MAX/2+3]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2203 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2204 if(!p || p->fast_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2205 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2206 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2207 am= (am+16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2208 }else{ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2209 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2210 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2211 am= (am+32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2212 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2213 |
5649 | 2214 if(am&(~255)) am= ~(am>>31); |
2215 tmp2[x]= am; | |
2216 } | |
2217 tmpI+= 64; | |
2218 tmp2+= stride; | |
2219 src += stride; | |
2138 | 2220 } |
5649 | 2221 src -= stride*y; |
5648 | 2222 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2223 src += HTAPS_MAX/2 - 1; |
5648 | 2224 tmp2= tmp2t[1]; |
2225 | |
2226 if(b&2){ | |
5649 | 2227 for(y=0; y < b_h; y++){ |
2228 for(x=0; x < b_w+1; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2229 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2230 int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2231 int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2232 int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2233 int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2234 int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2235 int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2236 int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2237 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2238 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2239 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2240 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2241 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2242 |
5649 | 2243 if(am&(~255)) am= ~(am>>31); |
2244 tmp2[x]= am; | |
2245 } | |
2246 src += stride; | |
2247 tmp2+= stride; | |
5648 | 2248 } |
5649 | 2249 src -= stride*y; |
5648 | 2250 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2251 src += stride*(HTAPS_MAX/2 - 1); |
5648 | 2252 tmp2= tmp2t[2]; |
2253 tmpI= tmpIt; | |
2254 if(b&4){ | |
2255 for(y=0; y < b_h; y++){ | |
2256 for(x=0; x < b_w; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2257 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2258 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2259 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2260 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2261 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2262 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2263 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2264 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2265 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2266 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2267 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2268 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2269 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
5648 | 2270 if(am&(~255)) am= ~(am>>31); |
2271 tmp2[x]= am; | |
2272 } | |
2273 tmpI+= 64; | |
2274 tmp2+= stride; | |
2138 | 2275 } |
5648 | 2276 } |
2277 | |
2278 hpel[ 0]= src; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2279 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); |
5648 | 2280 hpel[ 2]= src + 1; |
2281 | |
2282 hpel[ 4]= tmp2t[1]; | |
2283 hpel[ 5]= tmp2t[2]; | |
2284 hpel[ 6]= tmp2t[1] + 1; | |
2285 | |
2286 hpel[ 8]= src + stride; | |
2287 hpel[ 9]= hpel[1] + stride; | |
2288 hpel[10]= hpel[8] + 1; | |
2289 | |
2290 if(b==15){ | |
2291 uint8_t *src1= hpel[dx/8 + dy/8*4 ]; | |
2292 uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2293 uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2294 uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
2295 dx&=7; | |
2296 dy&=7; | |
2297 for(y=0; y < b_h; y++){ | |
2298 for(x=0; x < b_w; x++){ | |
2299 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2300 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2301 } | |
2302 src1+=stride; | |
2303 src2+=stride; | |
2304 src3+=stride; | |
2305 src4+=stride; | |
2306 dst +=stride; | |
2307 } | |
2308 }else{ | |
2309 uint8_t *src1= hpel[l]; | |
2310 uint8_t *src2= hpel[r]; | |
2311 int a= weight[((dx&7) + (8*(dy&7)))]; | |
2312 int b= 8-a; | |
2313 for(y=0; y < b_h; y++){ | |
2314 for(x=0; x < b_w; x++){ | |
2315 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2316 } | |
2317 src1+=stride; | |
2318 src2+=stride; | |
2319 dst +=stride; | |
2320 } | |
2138 | 2321 } |
2221 | 2322 STOP_TIMER("mc_block") |
2138 | 2323 } |
2324 | |
2325 #define mca(dx,dy,b_w)\ | |
5254 | 2326 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2327 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
2138 | 2328 assert(h==b_w);\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2329 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ |
2138 | 2330 } |
2331 | |
2332 mca( 0, 0,16) | |
2333 mca( 8, 0,16) | |
2334 mca( 0, 8,16) | |
2335 mca( 8, 8,16) | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2336 mca( 0, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2337 mca( 8, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2338 mca( 0, 8,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2339 mca( 8, 8,8) |
2138 | 2340 |
3314 | 2341 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2342 if(block->type & BLOCK_INTRA){ |
2206 | 2343 int x, y; |
3018 | 2344 const int color = block->color[plane_index]; |
2345 const int color4= color*0x01010101; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2346 if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2347 for(y=0; y < b_h; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2348 *(uint32_t*)&dst[0 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2349 *(uint32_t*)&dst[4 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2350 *(uint32_t*)&dst[8 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2351 *(uint32_t*)&dst[12+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2352 *(uint32_t*)&dst[16+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2353 *(uint32_t*)&dst[20+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2354 *(uint32_t*)&dst[24+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2355 *(uint32_t*)&dst[28+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2356 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2357 }else if(b_w==16){ |
3018 | 2358 for(y=0; y < b_h; y++){ |
2359 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2360 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2361 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2362 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2363 } | |
2364 }else if(b_w==8){ | |
2365 for(y=0; y < b_h; y++){ | |
2366 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2367 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2368 } | |
2369 }else if(b_w==4){ | |
2370 for(y=0; y < b_h; y++){ | |
2371 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2372 } | |
2373 }else{ | |
2374 for(y=0; y < b_h; y++){ | |
2375 for(x=0; x < b_w; x++){ | |
2376 dst[x + y*stride]= color; | |
2377 } | |
2138 | 2378 } |
2379 } | |
2380 }else{ | |
3314 | 2381 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
2206 | 2382 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
2383 int mx= block->mx*scale; | |
2384 int my= block->my*scale; | |
2223 | 2385 const int dx= mx&15; |
2386 const int dy= my&15; | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2387 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2388 sx += (mx>>4) - (HTAPS_MAX/2-1); |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2389 sy += (my>>4) - (HTAPS_MAX/2-1); |
2206 | 2390 src += sx + sy*stride; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2391 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2392 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2393 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); |
2206 | 2394 src= tmp + MB_SIZE; |
2138 | 2395 } |
3189 | 2396 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2397 // assert(!(b_w&(b_w-1))); | |
3018 | 2398 assert(b_w>1 && b_h>1); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2399 assert(tab_index>=0 && tab_index<4 || b_w==32); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2400 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2401 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2402 else if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2403 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2404 for(y=0; y<b_h; y+=16){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2405 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2406 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2407 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2408 }else if(b_w==b_h) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2409 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
3018 | 2410 else if(b_w==2*b_h){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2411 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2412 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
3018 | 2413 }else{ |
2414 assert(2*b_w==b_h); | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2415 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2416 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
3018 | 2417 } |
2138 | 2418 } |
2419 } | |
2420 | |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2421 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2422 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2423 int y, x; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2424 IDWTELEM * dst; |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2425 for(y=0; y<b_h; y++){ |
5409 | 2426 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2427 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2428 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2429 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2430 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2431 dst = slice_buffer_get_line(sb, src_y + y); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2432 for(x=0; x<b_w; x++){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2433 int v= obmc1[x] * block[3][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2434 +obmc2[x] * block[2][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2435 +obmc3[x] * block[1][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2436 +obmc4[x] * block[0][x + y*src_stride]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2437 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2438 v <<= 8 - LOG2_OBMC_MAX; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2439 if(FRAC_BITS != 8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2440 v >>= 8 - FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2441 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2442 if(add){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2443 v += dst[x + src_x]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2444 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2445 if(v&(~255)) v= ~(v>>31); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2446 dst8[x + y*src_stride] = v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2447 }else{ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2448 dst[x + src_x] -= v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2449 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2450 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2451 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2452 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2453 |
2206 | 2454 //FIXME name clenup (b_w, block_w, b_width stuff) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2455 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2456 const int b_width = s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2457 const int b_height= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2458 const int b_stride= b_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2459 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2460 BlockNode *rt= lt+1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2461 BlockNode *lb= lt+b_stride; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2462 BlockNode *rb= lb+1; |
2967 | 2463 uint8_t *block[4]; |
2842 | 2464 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
2465 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
2466 uint8_t *ptmp; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2467 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2468 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2469 if(b_x<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2470 lt= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2471 lb= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2472 }else if(b_x + 1 >= b_width){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2473 rt= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2474 rb= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2475 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2476 if(b_y<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2477 lt= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2478 rt= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2479 }else if(b_y + 1 >= b_height){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2480 lb= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2481 rb= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2482 } |
2967 | 2483 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2484 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2485 obmc -= src_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2486 b_w += src_x; |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2487 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2488 dst -= src_x; |
2206 | 2489 src_x=0; |
2490 }else if(src_x + b_w > w){ | |
2491 b_w = w - src_x; | |
2492 } | |
2493 if(src_y<0){ | |
2494 obmc -= src_y*obmc_stride; | |
2495 b_h += src_y; | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2496 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2497 dst -= src_y*dst_stride; |
2206 | 2498 src_y=0; |
2499 }else if(src_y + b_h> h){ | |
2500 b_h = h - src_y; | |
2501 } | |
2967 | 2502 |
2206 | 2503 if(b_w<=0 || b_h<=0) return; |
2504 | |
2842 | 2505 assert(src_stride > 2*MB_SIZE + 5); |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2506 if(!sliced && offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2507 dst += src_x + src_y*dst_stride; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2508 dst8+= src_x + src_y*src_stride; |
2206 | 2509 // src += src_x + src_y*src_stride; |
2510 | |
2842 | 2511 ptmp= tmp + 3*tmp_step; |
2512 block[0]= ptmp; | |
2513 ptmp+=tmp_step; | |
3314 | 2514 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
2206 | 2515 |
2516 if(same_block(lt, rt)){ | |
2517 block[1]= block[0]; | |
2518 }else{ | |
2842 | 2519 block[1]= ptmp; |
2520 ptmp+=tmp_step; | |
3314 | 2521 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2206 | 2522 } |
2967 | 2523 |
2206 | 2524 if(same_block(lt, lb)){ |
2525 block[2]= block[0]; | |
2526 }else if(same_block(rt, lb)){ | |
2527 block[2]= block[1]; | |
2528 }else{ | |
2842 | 2529 block[2]= ptmp; |
2530 ptmp+=tmp_step; | |
3314 | 2531 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2206 | 2532 } |
2533 | |
2534 if(same_block(lt, rb) ){ | |
2535 block[3]= block[0]; | |
2536 }else if(same_block(rt, rb)){ | |
2537 block[3]= block[1]; | |
2538 }else if(same_block(lb, rb)){ | |
2539 block[3]= block[2]; | |
2540 }else{ | |
2842 | 2541 block[3]= ptmp; |
3314 | 2542 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2206 | 2543 } |
2544 #if 0 | |
2545 for(y=0; y<b_h; y++){ | |
2546 for(x=0; x<b_w; x++){ | |
2547 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2548 if(add) dst[x + y*dst_stride] += v; | |
2549 else dst[x + y*dst_stride] -= v; | |
2550 } | |
2551 } | |
2552 for(y=0; y<b_h; y++){ | |
2553 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2554 for(x=0; x<b_w; x++){ | |
2555 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2556 if(add) dst[x + y*dst_stride] += v; | |
2557 else dst[x + y*dst_stride] -= v; | |
2558 } | |
2559 } | |
2560 for(y=0; y<b_h; y++){ | |
2561 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2562 for(x=0; x<b_w; x++){ | |
2563 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2564 if(add) dst[x + y*dst_stride] += v; | |
2565 else dst[x + y*dst_stride] -= v; | |
2566 } | |
2567 } | |
2568 for(y=0; y<b_h; y++){ | |
2569 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2570 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2571 for(x=0; x<b_w; x++){ | |
2572 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2573 if(add) dst[x + y*dst_stride] += v; | |
2574 else dst[x + y*dst_stride] -= v; | |
2575 } | |
2576 } | |
2577 #else | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2578 if(sliced){ |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2579 START_TIMER |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2580 |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2581 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2582 STOP_TIMER("inner_add_yblock") |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2583 }else |
2206 | 2584 for(y=0; y<b_h; y++){ |
5409 | 2585 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2586 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2587 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2588 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2589 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
2206 | 2590 for(x=0; x<b_w; x++){ |
2591 int v= obmc1[x] * block[3][x + y*src_stride] | |
2592 +obmc2[x] * block[2][x + y*src_stride] | |
2593 +obmc3[x] * block[1][x + y*src_stride] | |
2594 +obmc4[x] * block[0][x + y*src_stride]; | |
2967 | 2595 |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2596 v <<= 8 - LOG2_OBMC_MAX; |
2246 | 2597 if(FRAC_BITS != 8){ |
2598 v >>= 8 - FRAC_BITS; | |
2599 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2600 if(add){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2601 v += dst[x + y*dst_stride]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2602 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2603 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2604 dst8[x + y*src_stride] = v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2605 }else{ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2606 dst[x + y*dst_stride] -= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2607 } |
2206 | 2608 } |
2609 } | |
5910 | 2610 #endif /* 0 */ |
2206 | 2611 } |
2612 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2613 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2614 Plane *p= &s->plane[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2615 const int mb_w= s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2616 const int mb_h= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2617 int x, y, mb_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2618 int block_size = MB_SIZE >> s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2619 int block_w = plane_index ? block_size/2 : block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2620 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2621 int obmc_stride= plane_index ? block_size : 2*block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2622 int ref_stride= s->current_picture.linesize[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2623 uint8_t *dst8= s->current_picture.data[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2624 int w= p->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2625 int h= p->height; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2626 START_TIMER |
2967 | 2627 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2628 if(s->keyframe || (s->avctx->debug&512)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2629 if(mb_y==mb_h) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2630 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2631 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2632 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2633 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2634 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2635 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2636 IDWTELEM * line = sb->line[y]; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2637 for(x=0; x<w; x++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2638 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2639 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2640 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2641 v >>= FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2642 if(v&(~255)) v= ~(v>>31); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2643 dst8[x + y*ref_stride]= v; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2644 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2645 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2646 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2648 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2649 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2650 IDWTELEM * line = sb->line[y]; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2651 for(x=0; x<w; x++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2652 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2653 line[x] -= 128 << FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2654 // buf[x + y*w]-= 128<<FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2655 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2656 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2657 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2658 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2659 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2660 } |
2967 | 2661 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2662 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2663 START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2664 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2665 add_yblock(s, 1, sb, old_buffer, dst8, obmc, |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2666 block_w*mb_x - block_w/2, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2667 block_w*mb_y - block_w/2, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2668 block_w, block_w, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2669 w, h, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2670 w, ref_stride, obmc_stride, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2671 mb_x - 1, mb_y - 1, |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2672 add, 0, plane_index); |
2967 | 2673 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2674 STOP_TIMER("add_yblock") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2675 } |
2967 | 2676 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2677 STOP_TIMER("predict_slice") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2678 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2679 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2680 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2138 | 2681 Plane *p= &s->plane[plane_index]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2682 const int mb_w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2683 const int mb_h= s->b_height << s->block_max_depth; |
2562 | 2684 int x, y, mb_x; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2685 int block_size = MB_SIZE >> s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2686 int block_w = plane_index ? block_size/2 : block_size; |
2206 | 2687 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2688 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2689 int ref_stride= s->current_picture.linesize[plane_index]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2690 uint8_t *dst8= s->current_picture.data[plane_index]; |
2138 | 2691 int w= p->width; |
2692 int h= p->height; | |
2197 | 2693 START_TIMER |
2967 | 2694 |
2206 | 2695 if(s->keyframe || (s->avctx->debug&512)){ |
2562 | 2696 if(mb_y==mb_h) |
2697 return; | |
2698 | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2699 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2700 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2701 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2702 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2703 v >>= FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2704 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2705 dst8[x + y*ref_stride]= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2706 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2707 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2708 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2709 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2710 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2711 buf[x + y*w]-= 128<<FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2712 } |
2206 | 2713 } |
2138 | 2714 } |
2206 | 2715 |
2716 return; | |
2138 | 2717 } |
2967 | 2718 |
2206 | 2719 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2197 | 2720 START_TIMER |
2206 | 2721 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2722 add_yblock(s, 0, NULL, buf, dst8, obmc, |
2206 | 2723 block_w*mb_x - block_w/2, |
2138 | 2724 block_w*mb_y - block_w/2, |
2206 | 2725 block_w, block_w, |
2138 | 2726 w, h, |
2206 | 2727 w, ref_stride, obmc_stride, |
2728 mb_x - 1, mb_y - 1, | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2729 add, 1, plane_index); |
2967 | 2730 |
2206 | 2731 STOP_TIMER("add_yblock") |
2138 | 2732 } |
2967 | 2733 |
2562 | 2734 STOP_TIMER("predict_slice") |
2735 } | |
2736 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2737 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2562 | 2738 const int mb_h= s->b_height << s->block_max_depth; |
2739 int mb_y; | |
2740 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2741 predict_slice(s, buf, plane_index, add, mb_y); | |
2138 | 2742 } |
2743 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2744 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2745 int i, x2, y2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2746 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2747 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2748 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2749 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2750 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2751 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2752 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2753 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2754 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2755 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2756 const int h= p->height; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2757 int index= mb_x + mb_y*b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2758 BlockNode *b= &s->block[index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2759 BlockNode backup= *b; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2760 int ab=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2761 int aa=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2762 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2763 b->type|= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2764 b->color[plane_index]= 0; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2765 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2766 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2767 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2768 int mb_x2= mb_x + (i &1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2769 int mb_y2= mb_y + (i>>1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2770 int x= block_w*mb_x2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2771 int y= block_w*mb_y2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2772 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2773 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2774 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2775 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2776 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2777 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2778 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2779 int obmc_v= obmc[index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2780 int d; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2781 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2782 if(x<0) obmc_v += obmc[index + block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2783 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2784 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2785 //FIXME precalc this or simplify it somehow else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2786 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2787 d = -dst[index] + (1<<(FRAC_BITS-1)); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2788 dst[index] = d; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2789 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2790 aa += obmc_v * obmc_v; //FIXME precalclate this |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2791 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2792 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2793 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2794 *b= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2795 |
5127 | 2796 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2797 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2798 |
3051 | 2799 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
2800 const int b_stride = s->b_width << s->block_max_depth; | |
2801 const int b_height = s->b_height<< s->block_max_depth; | |
2802 int index= x + y*b_stride; | |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2803 const BlockNode *b = &s->block[index]; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2804 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2805 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2806 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2807 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
3051 | 2808 int dmx, dmy; |
4001 | 2809 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
2810 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
3051 | 2811 |
2812 if(x<0 || x>=b_stride || y>=b_height) | |
2813 return 0; | |
2814 /* | |
2815 1 0 0 | |
2816 01X 1-2 1 | |
2817 001XX 3-6 2-3 | |
2818 0001XXX 7-14 4-7 | |
2819 00001XXXX 15-30 8-15 | |
2820 */ | |
2821 //FIXME try accurate rate | |
2822 //FIXME intra and inter predictors if surrounding blocks arent the same type | |
2823 if(b->type & BLOCK_INTRA){ | |
4001 | 2824 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
2825 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
2826 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2827 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2828 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2829 dmx-= b->mx; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2830 dmy-= b->my; |
4001 | 2831 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
2832 + av_log2(2*FFABS(dmy)) | |
3314 | 2833 + av_log2(2*b->ref)); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2834 } |
3051 | 2835 } |
2836 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2837 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2838 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2839 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2840 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2841 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2842 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2843 uint8_t *dst= s->current_picture.data[plane_index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2844 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2845 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2846 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2847 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2848 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2849 const int b_height = s->b_height<< s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2850 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2851 const int h= p->height; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2852 int distortion; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2853 int rate= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2854 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2855 int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2856 int sy= block_w*mb_y - block_w/2; |
3206 | 2857 int x0= FFMAX(0,-sx); |
2858 int y0= FFMAX(0,-sy); | |
2859 int x1= FFMIN(block_w*2, w-sx); | |
2860 int y1= FFMIN(block_w*2, h-sy); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2861 int i,x,y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2862 |
3314 | 2863 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2864 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2865 for(y=y0; y<y1; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2866 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2867 const IDWTELEM *pred1 = pred + y*obmc_stride; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2868 uint8_t *cur1 = cur + y*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2869 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2870 for(x=x0; x<x1; x++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2871 #if FRAC_BITS >= LOG2_OBMC_MAX |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2872 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2873 #else |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2874 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2875 #endif |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2876 v = (v + pred1[x]) >> FRAC_BITS; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2877 if(v&(~255)) v= ~(v>>31); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2878 dst1[x] = v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2879 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2880 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2881 |
3206 | 2882 /* copy the regions where obmc[] = (uint8_t)256 */ |
2883 if(LOG2_OBMC_MAX == 8 | |
2884 && (mb_x == 0 || mb_x == b_stride-1) | |
2885 && (mb_y == 0 || mb_y == b_height-1)){ | |
2886 if(mb_x == 0) | |
2887 x1 = block_w; | |
2888 else | |
2889 x0 = block_w; | |
2890 if(mb_y == 0) | |
2891 y1 = block_w; | |
2892 else | |
2893 y0 = block_w; | |
2894 for(y=y0; y<y1; y++) | |
2895 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
2896 } | |
2897 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2898 if(block_w==16){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2899 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2900 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2901 /* FIXME cmps overlap but don't cover the wavelet's whole support, |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2902 * so improving the score of one block is not strictly guaranteed to |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2903 * improve the score of the whole frame, so iterative motion est |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2904 * doesn't always converge. */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2905 if(s->avctx->me_cmp == FF_CMP_W97) |
4197 | 2906 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2907 else if(s->avctx->me_cmp == FF_CMP_W53) |
4197 | 2908 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2909 else{ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2910 distortion = 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2911 for(i=0; i<4; i++){ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2912 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2913 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2914 } |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2915 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2916 }else{ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2917 assert(block_w==8); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2918 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2919 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2920 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2921 if(plane_index==0){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2922 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2923 /* ..RRr |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2924 * .RXx. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2925 * rxx.. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2926 */ |
3051 | 2927 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2928 } |
3057 | 2929 if(mb_x == b_stride-2) |
2930 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2931 } |
3051 | 2932 return distortion + rate*penalty_factor; |
2933 } | |
2934 | |
2935 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2936 int i, y2; | |
2937 Plane *p= &s->plane[plane_index]; | |
2938 const int block_size = MB_SIZE >> s->block_max_depth; | |
2939 const int block_w = plane_index ? block_size/2 : block_size; | |
2940 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2941 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2942 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2943 uint8_t *dst= s->current_picture.data[plane_index]; | |
2944 uint8_t *src= s-> input_picture.data[plane_index]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2945 static const IDWTELEM zero_dst[4096]; //FIXME |
3051 | 2946 const int b_stride = s->b_width << s->block_max_depth; |
2947 const int w= p->width; | |
2948 const int h= p->height; | |
2949 int distortion= 0; | |
2950 int rate= 0; | |
2951 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
2952 | |
2953 for(i=0; i<9; i++){ | |
2954 int mb_x2= mb_x + (i%3) - 1; | |
2955 int mb_y2= mb_y + (i/3) - 1; | |
2956 int x= block_w*mb_x2 + block_w/2; | |
2957 int y= block_w*mb_y2 + block_w/2; | |
2958 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2959 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3051 | 2960 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
2961 | |
2962 //FIXME find a cleaner/simpler way to skip the outside stuff | |
2963 for(y2= y; y2<0; y2++) | |
2964 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2965 for(y2= h; y2<y+block_w; y2++) | |
2966 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2967 if(x<0){ | |
2968 for(y2= y; y2<y+block_w; y2++) | |
2969 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
2970 } | |
2971 if(x+block_w > w){ | |
2972 for(y2= y; y2<y+block_w; y2++) | |
2973 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
2974 } | |
2975 | |
2976 assert(block_w== 8 || block_w==16); | |
2977 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
2978 } | |
2979 | |
2980 if(plane_index==0){ | |
2981 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
2982 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
2983 | |
2984 /* ..RRRr | |
2985 * .RXXx. | |
2986 * .RXXx. | |
2987 * rxxx. | |
2988 */ | |
2989 if(merged) | |
2990 rate = get_block_bits(s, mb_x, mb_y, 2); | |
2991 for(i=merged?4:0; i<9; i++){ | |
2992 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
2993 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
2994 } | |
2995 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2996 return distortion + rate*penalty_factor; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2997 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2998 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2999 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3000 const int b_stride= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3001 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3002 BlockNode backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3003 int rd, index, value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3004 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3005 assert(mb_x>=0 && mb_y>=0); |
2994 | 3006 assert(mb_x<b_stride); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3007 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3008 if(intra){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3009 block->color[0] = p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3010 block->color[1] = p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3011 block->color[2] = p[2]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3012 block->type |= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3013 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3014 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
3314 | 3015 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3016 if(s->me_cache[index] == value) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3017 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3018 s->me_cache[index]= value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3019 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3020 block->mx= p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3021 block->my= p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3022 block->type &= ~BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3023 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3024 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3025 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3026 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3027 //FIXME chroma |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3028 if(rd < *best_rd){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3029 *best_rd= rd; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3030 return 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3031 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3032 *block= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3033 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3034 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3035 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3036 |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
3037 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
3038 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
3039 int p[2] = {p0, p1}; |
3197 | 3040 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
3041 } |
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
3042 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
3043 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
3051 | 3044 const int b_stride= s->b_width << s->block_max_depth; |
3045 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
3046 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
3047 int rd, index, value; | |
3048 | |
3049 assert(mb_x>=0 && mb_y>=0); | |
3050 assert(mb_x<b_stride); | |
3051 assert(((mb_x|mb_y)&1) == 0); | |
3052 | |
3053 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3314 | 3054 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
3051 | 3055 if(s->me_cache[index] == value) |
3056 return 0; | |
3057 s->me_cache[index]= value; | |
3058 | |
3059 block->mx= p0; | |
3060 block->my= p1; | |
3314 | 3061 block->ref= ref; |
3051 | 3062 block->type &= ~BLOCK_INTRA; |
3063 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
3064 | |
3065 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3066 | |
3067 //FIXME chroma | |
3068 if(rd < *best_rd){ | |
3069 *best_rd= rd; | |
3070 return 1; | |
3071 }else{ | |
3072 block[0]= backup[0]; | |
3073 block[1]= backup[1]; | |
3074 block[b_stride]= backup[2]; | |
3075 block[b_stride+1]= backup[3]; | |
3076 return 0; | |
3077 } | |
3078 } | |
3079 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3080 static void iterative_me(SnowContext *s){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3081 int pass, mb_x, mb_y; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3082 const int b_width = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3083 const int b_height= s->b_height << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3084 const int b_stride= b_width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3085 int color[3]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3086 |
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3087 { |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3088 RangeCoder r = s->c; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3089 uint8_t state[sizeof(s->block_state)]; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3090 memcpy(state, s->block_state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3091 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3092 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3093 encode_q_branch(s, 0, mb_x, mb_y); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3094 s->c = r; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3095 memcpy(s->block_state, state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3096 } |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3097 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
3098 for(pass=0; pass<25; pass++){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3099 int change= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3100 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3101 for(mb_y= 0; mb_y<b_height; mb_y++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3102 for(mb_x= 0; mb_x<b_width; mb_x++){ |
3314 | 3103 int dia_change, i, j, ref; |
3104 int best_rd= INT_MAX, ref_rd; | |
3105 BlockNode backup, ref_b; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3106 const int index= mb_x + mb_y * b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3107 BlockNode *block= &s->block[index]; |
3324 | 3108 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
3109 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
3110 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
3111 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
3112 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
3113 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
3114 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
3115 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3116 const int b_w= (MB_SIZE >> s->block_max_depth); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3117 uint8_t obmc_edged[b_w*2][b_w*2]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3118 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3119 if(pass && (block->type & BLOCK_OPT)) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3120 continue; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3121 block->type |= BLOCK_OPT; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3122 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3123 backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3124 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3125 if(!s->me_cache_generation) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3126 memset(s->me_cache, 0, sizeof(s->me_cache)); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3127 s->me_cache_generation += 1<<22; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3128 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3129 //FIXME precalc |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3130 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3131 int x, y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3132 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3133 if(mb_x==0) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3134 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3135 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3136 if(mb_x==b_stride-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3137 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3138 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3139 if(mb_y==0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3140 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3141 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3142 for(y=1; y<b_w; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3143 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3144 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3145 if(mb_y==b_height-1){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3146 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3147 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3148 for(y=b_w; y<b_w*2-1; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3149 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3150 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3151 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3152 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3153 //skip stuff outside the picture |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3154 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3155 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3156 uint8_t *src= s-> input_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3157 uint8_t *dst= s->current_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3158 const int stride= s->current_picture.linesize[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3159 const int block_w= MB_SIZE >> s->block_max_depth; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3160 const int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3161 const int sy= block_w*mb_y - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3162 const int w= s->plane[0].width; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3163 const int h= s->plane[0].height; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3164 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3165 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3166 for(y=sy; y<0; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3167 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3168 for(y=h; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3169 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3170 if(sx<0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3171 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3172 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3173 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3174 if(sx+block_w*2 > w){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3175 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3176 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3177 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3178 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3179 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3180 // intra(black) = neighbors' contribution to the current block |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3181 for(i=0; i<3; i++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3182 color[i]= get_dc(s, mb_x, mb_y, i); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3183 |
5127 | 3184 // get previous score (cannot be cached due to OBMC) |
3057 | 3185 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
3186 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
3187 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
3188 }else | |
3197 | 3189 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
3190 | |
3314 | 3191 ref_b= *block; |
3192 ref_rd= best_rd; | |
3193 for(ref=0; ref < s->ref_frames; ref++){ | |
3194 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
3195 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
3196 continue; | |
3197 block->ref= ref; | |
3198 best_rd= INT_MAX; | |
3199 | |
3200 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
3201 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
3324 | 3202 if(tb) |
3314 | 3203 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
3324 | 3204 if(lb) |
3314 | 3205 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
3324 | 3206 if(rb) |
3314 | 3207 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
3324 | 3208 if(bb) |
3314 | 3209 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
3210 | |
3211 /* fullpel ME */ | |
3212 //FIXME avoid subpel interpol / round to nearest integer | |
3213 do{ | |
3214 dia_change=0; | |
3215 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3216 for(j=0; j<i; j++){ | |
3217 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3218 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3219 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3220 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3221 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3222 } |
3314 | 3223 }while(dia_change); |
3224 /* subpel ME */ | |
3225 do{ | |
3226 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3227 dia_change=0; | |
3228 for(i=0; i<8; i++) | |
3229 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
3230 }while(dia_change); | |
3231 //FIXME or try the standard 2 pass qpel or similar | |
3232 | |
3233 mvr[0][0]= block->mx; | |
3234 mvr[0][1]= block->my; | |
3235 if(ref_rd > best_rd){ | |
3236 ref_rd= best_rd; | |
3237 ref_b= *block; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3238 } |
3314 | 3239 } |
3240 best_rd= ref_rd; | |
3241 *block= ref_b; | |
2998 | 3242 #if 1 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3243 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3244 //FIXME RD style color selection |
2998 | 3245 #endif |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3246 if(!same_block(block, &backup)){ |
3324 | 3247 if(tb ) tb ->type &= ~BLOCK_OPT; |
3248 if(lb ) lb ->type &= ~BLOCK_OPT; | |
3249 if(rb ) rb ->type &= ~BLOCK_OPT; | |
3250 if(bb ) bb ->type &= ~BLOCK_OPT; | |
3251 if(tlb) tlb->type &= ~BLOCK_OPT; | |
3252 if(trb) trb->type &= ~BLOCK_OPT; | |
3253 if(blb) blb->type &= ~BLOCK_OPT; | |
3254 if(brb) brb->type &= ~BLOCK_OPT; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3255 change ++; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3256 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3257 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3258 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3259 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3260 if(!change) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3261 break; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3262 } |
3051 | 3263 |
3264 if(s->block_max_depth == 1){ | |
3265 int change= 0; | |
3266 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3267 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3324 | 3268 int i; |
3051 | 3269 int best_rd, init_rd; |
3270 const int index= mb_x + mb_y * b_stride; | |
3271 BlockNode *b[4]; | |
3272 | |
3273 b[0]= &s->block[index]; | |
3274 b[1]= b[0]+1; | |
3275 b[2]= b[0]+b_stride; | |
3276 b[3]= b[2]+1; | |
3277 if(same_block(b[0], b[1]) && | |
3278 same_block(b[0], b[2]) && | |
3279 same_block(b[0], b[3])) | |
3280 continue; | |
3281 | |
3282 if(!s->me_cache_generation) | |
3283 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3284 s->me_cache_generation += 1<<22; | |
3285 | |
3286 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3287 | |
3314 | 3288 //FIXME more multiref search? |
3051 | 3289 check_4block_inter(s, mb_x, mb_y, |
3290 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3314 | 3291 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
3051 | 3292 |
3293 for(i=0; i<4; i++) | |
3294 if(!(b[i]->type&BLOCK_INTRA)) | |
3314 | 3295 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
3051 | 3296 |
3297 if(init_rd != best_rd) | |
3298 change++; | |
3299 } | |
3300 } | |
3301 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3302 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3303 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3304 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3305 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
2138 | 3306 const int level= b->level; |
3307 const int w= b->width; | |
3308 const int h= b->height; | |
4594 | 3309 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
5575 | 3310 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
2150 | 3311 int x,y, thres1, thres2; |
2893 | 3312 // START_TIMER |
2138 | 3313 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3314 if(s->qlog == LOSSLESS_QLOG){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3315 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3316 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3317 dst[x + y*stride]= src[x + y*stride]; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3318 return; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3319 } |
2967 | 3320 |
2138 | 3321 bias= bias ? 0 : (3*qmul)>>3; |
2150 | 3322 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3323 thres2= 2*thres1; | |
2967 | 3324 |
2138 | 3325 if(!bias){ |
3326 for(y=0; y<h; y++){ | |
3327 for(x=0; x<w; x++){ | |
2150 | 3328 int i= src[x + y*stride]; |
2967 | 3329 |
2150 | 3330 if((unsigned)(i+thres1) > thres2){ |
3331 if(i>=0){ | |
3332 i<<= QEXPSHIFT; | |
3333 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3334 dst[x + y*stride]= i; |
2150 | 3335 }else{ |
3336 i= -i; | |
3337 i<<= QEXPSHIFT; | |
3338 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3339 dst[x + y*stride]= -i; |
2150 | 3340 } |
3341 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3342 dst[x + y*stride]= 0; |
2138 | 3343 } |
3344 } | |
3345 }else{ | |
3346 for(y=0; y<h; y++){ | |
3347 for(x=0; x<w; x++){ | |
2967 | 3348 int i= src[x + y*stride]; |
3349 | |
2150 | 3350 if((unsigned)(i+thres1) > thres2){ |
3351 if(i>=0){ | |
3352 i<<= QEXPSHIFT; | |
3353 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3354 dst[x + y*stride]= i; |
2150 | 3355 }else{ |
3356 i= -i; | |
3357 i<<= QEXPSHIFT; | |
3358 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3359 dst[x + y*stride]= -i; |
2150 | 3360 } |
3361 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3362 dst[x + y*stride]= 0; |
2138 | 3363 } |
3364 } | |
3365 } | |
2150 | 3366 if(level+1 == s->spatial_decomposition_count){ |
3367 // STOP_TIMER("quantize") | |
3368 } | |
2138 | 3369 } |
3370 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3371 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3372 const int w= b->width; |
4594 | 3373 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3374 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3375 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3376 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3377 START_TIMER |
2967 | 3378 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3379 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3380 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3381 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3382 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3383 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3384 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3385 int i= line[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3386 if(i<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3387 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3388 }else if(i>0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3389 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3390 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3391 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3392 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3393 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3394 STOP_TIMER("dquant") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3395 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3396 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3397 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3398 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
2138 | 3399 const int w= b->width; |
3400 const int h= b->height; | |
4594 | 3401 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3402 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2138 | 3403 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3404 int x,y; | |
2195 | 3405 START_TIMER |
2967 | 3406 |
2161 | 3407 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3408 |
2138 | 3409 for(y=0; y<h; y++){ |
3410 for(x=0; x<w; x++){ | |
3411 int i= src[x + y*stride]; | |
3412 if(i<0){ | |
3413 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3414 }else if(i>0){ | |
3415 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3416 } | |
3417 } | |
3418 } | |
2195 | 3419 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ |
3420 STOP_TIMER("dquant") | |
3421 } | |
2138 | 3422 } |
3423 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3424 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3425 const int w= b->width; |
3426 const int h= b->height; | |
3427 int x,y; | |
2967 | 3428 |
2138 | 3429 for(y=h-1; y>=0; y--){ |
3430 for(x=w-1; x>=0; x--){ | |
3431 int i= x + y*stride; | |
2967 | 3432 |
2138 | 3433 if(x){ |
3434 if(use_median){ | |
3435 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3436 else src[i] -= src[i - 1]; | |
3437 }else{ | |
3438 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3439 else src[i] -= src[i - 1]; | |
3440 } | |
3441 }else{ | |
3442 if(y) src[i] -= src[i - stride]; | |
3443 } | |
3444 } | |
3445 } | |
3446 } | |
3447 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3448 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3449 const int w= b->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3450 int x,y; |
2967 | 3451 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3452 // START_TIMER |
2967 | 3453 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3454 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3455 IDWTELEM * prev; |
2967 | 3456 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3457 if (start_y != 0) |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3458 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2967 | 3459 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3460 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3461 prev = line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3462 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3463 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3464 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3465 if(x){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3466 if(use_median){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3467 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3468 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3469 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3470 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3471 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3472 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3473 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3474 if(y) line[x] += prev[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3475 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3476 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3477 } |
2967 | 3478 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3479 // STOP_TIMER("correlate") |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3480 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3481 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3482 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3483 const int w= b->width; |
3484 const int h= b->height; | |
3485 int x,y; | |
2967 | 3486 |
2138 | 3487 for(y=0; y<h; y++){ |
3488 for(x=0; x<w; x++){ | |
3489 int i= x + y*stride; | |
2967 | 3490 |
2138 | 3491 if(x){ |
3492 if(use_median){ | |
3493 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3494 else src[i] += src[i - 1]; | |
3495 }else{ | |
3496 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3497 else src[i] += src[i - 1]; | |
3498 } | |
3499 }else{ | |
3500 if(y) src[i] += src[i - stride]; | |
3501 } | |
3502 } | |
3503 } | |
3504 } | |
3505 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3506 static void encode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3507 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3508 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3509 for(plane_index=0; plane_index<2; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3510 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3511 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3512 if(orientation==2) continue; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3513 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3514 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3515 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3516 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3517 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3518 |
2138 | 3519 static void encode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3520 int plane_index, i; |
2967 | 3521 uint8_t kstate[32]; |
3522 | |
3523 memset(kstate, MID_STATE, sizeof(kstate)); | |
2138 | 3524 |
2335 | 3525 put_rac(&s->c, kstate, s->keyframe); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3526 if(s->keyframe || s->always_reset){ |
2199 | 3527 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3528 s->last_spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3529 s->last_qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3530 s->last_qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3531 s->last_mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3532 s->last_block_max_depth= 0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3533 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3534 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3535 p->last_htaps=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3536 p->last_diag_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3537 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3538 } |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3539 } |
2138 | 3540 if(s->keyframe){ |
3541 put_symbol(&s->c, s->header_state, s->version, 0); | |
2335 | 3542 put_rac(&s->c, s->header_state, s->always_reset); |
2138 | 3543 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3544 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3545 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3546 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
3547 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
3548 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
2335 | 3549 put_rac(&s->c, s->header_state, s->spatial_scalability); |
3550 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
3314 | 3551 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
2138 | 3552 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3553 encode_qlogs(s); |
2138 | 3554 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3555 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3556 if(!s->keyframe){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3557 int update_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3558 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3559 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3560 update_mc |= p->last_htaps != p->htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3561 update_mc |= p->last_diag_mc != p->diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3562 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3563 } |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3564 put_rac(&s->c, s->header_state, update_mc); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3565 if(update_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3566 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3567 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3568 put_rac(&s->c, s->header_state, p->diag_mc); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3569 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3570 for(i= p->htaps/2; i; i--) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3571 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3572 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3573 p->last_diag_mc= p->diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3574 p->last_htaps= p->htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3575 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3576 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3577 } |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3578 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3579 put_rac(&s->c, s->header_state, 1); |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3580 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3581 encode_qlogs(s); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3582 }else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3583 put_rac(&s->c, s->header_state, 0); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3584 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3585 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3586 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3587 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3588 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3589 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3590 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3591 |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3592 s->last_spatial_decomposition_type= s->spatial_decomposition_type; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3593 s->last_qlog = s->qlog; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3594 s->last_qbias = s->qbias; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3595 s->last_mv_scale = s->mv_scale; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3596 s->last_block_max_depth = s->block_max_depth; |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3597 s->last_spatial_decomposition_count= s->spatial_decomposition_count; |
2138 | 3598 } |
3599 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3600 static void decode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3601 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3602 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3603 for(plane_index=0; plane_index<3; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3604 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3605 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3606 int q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3607 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3608 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3609 else q= get_symbol(&s->c, s->header_state, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3610 s->plane[plane_index].band[level][orientation].qlog= q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3611 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3612 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3613 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3614 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3615 |
2138 | 3616 static int decode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3617 int plane_index; |
2335 | 3618 uint8_t kstate[32]; |
2138 | 3619 |
2967 | 3620 memset(kstate, MID_STATE, sizeof(kstate)); |
2335 | 3621 |
3622 s->keyframe= get_rac(&s->c, kstate); | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3623 if(s->keyframe || s->always_reset){ |
2199 | 3624 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3625 s->spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3626 s->qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3627 s->qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3628 s->mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3629 s->block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3630 } |
2138 | 3631 if(s->keyframe){ |
3632 s->version= get_symbol(&s->c, s->header_state, 0); | |
3633 if(s->version>0){ | |
3634 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); | |
3635 return -1; | |
3636 } | |
2335 | 3637 s->always_reset= get_rac(&s->c, s->header_state); |
2138 | 3638 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3639 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3640 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3641 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
3642 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3643 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2335 | 3644 s->spatial_scalability= get_rac(&s->c, s->header_state); |
3645 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
3314 | 3646 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1; |
2138 | 3647 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3648 decode_qlogs(s); |
2138 | 3649 } |
2967 | 3650 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3651 if(!s->keyframe){ |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3652 if(get_rac(&s->c, s->header_state)){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3653 for(plane_index=0; plane_index<2; plane_index++){ |
5807 | 3654 int htaps, i, sum=0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3655 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3656 p->diag_mc= get_rac(&s->c, s->header_state); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3657 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
3658 if((unsigned)htaps > HTAPS_MAX || htaps==0) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3659 return -1; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3660 p->htaps= htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3661 for(i= htaps/2; i; i--){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3662 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3663 sum += p->hcoeff[i]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3664 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3665 p->hcoeff[0]= 32-sum; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3666 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3667 s->plane[2].diag_mc= s->plane[1].diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3668 s->plane[2].htaps = s->plane[1].htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3669 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3670 } |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3671 if(get_rac(&s->c, s->header_state)){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3672 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3673 decode_qlogs(s); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3674 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3675 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3676 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3677 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
5588
effa59ca89b3
we only have 2 wavelets, the 3rd was just for experimentation ...
michael
parents:
5587
diff
changeset
|
3678 if(s->spatial_decomposition_type > 1){ |
2138 | 3679 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
3680 return -1; | |
3681 } | |
2967 | 3682 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3683 s->qlog += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3684 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3685 s->qbias += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3686 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3687 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
2952 | 3688 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
3689 s->block_max_depth= 0; | |
3690 return -1; | |
3691 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3692 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3693 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3694 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3695 |
3075 | 3696 static void init_qexp(void){ |
2600 | 3697 int i; |
3698 double v=128; | |
3699 | |
3700 for(i=0; i<QROOT; i++){ | |
3701 qexp[i]= lrintf(v); | |
2967 | 3702 v *= pow(2, 1.0 / QROOT); |
2600 | 3703 } |
3704 } | |
3705 | |
2138 | 3706 static int common_init(AVCodecContext *avctx){ |
3707 SnowContext *s = avctx->priv_data; | |
3708 int width, height; | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3709 int i, j; |
2138 | 3710 |
3711 s->avctx= avctx; | |
2967 | 3712 |
2138 | 3713 dsputil_init(&s->dsp, avctx); |
3714 | |
3715 #define mcf(dx,dy)\ | |
3716 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3717 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3718 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3719 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3720 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3721 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
2138 | 3722 |
3723 mcf( 0, 0) | |
3724 mcf( 4, 0) | |
3725 mcf( 8, 0) | |
3726 mcf(12, 0) | |
3727 mcf( 0, 4) | |
3728 mcf( 4, 4) | |
3729 mcf( 8, 4) | |
3730 mcf(12, 4) | |
3731 mcf( 0, 8) | |
3732 mcf( 4, 8) | |
3733 mcf( 8, 8) | |
3734 mcf(12, 8) | |
3735 mcf( 0,12) | |
3736 mcf( 4,12) | |
3737 mcf( 8,12) | |
3738 mcf(12,12) | |
3739 | |
3740 #define mcfh(dx,dy)\ | |
3741 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3742 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3743 mc_block_hpel ## dx ## dy ## 16;\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3744 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3745 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3746 mc_block_hpel ## dx ## dy ## 8; |
2138 | 3747 |
3748 mcfh(0, 0) | |
3749 mcfh(8, 0) | |
3750 mcfh(0, 8) | |
3751 mcfh(8, 8) | |
2600 | 3752 |
3753 if(!qexp[0]) | |
3754 init_qexp(); | |
3755 | |
2138 | 3756 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
2967 | 3757 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3758 width= s->avctx->width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3759 height= s->avctx->height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3760 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3761 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3762 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here |
2967 | 3763 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3764 for(i=0; i<MAX_REF_FRAMES; i++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3765 for(j=0; j<MAX_REF_FRAMES; j++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3766 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3767 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3768 s->avctx->get_buffer(s->avctx, &s->mconly_picture); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3769 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3770 return 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3771 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3772 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3773 static int common_init_after_header(AVCodecContext *avctx){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3774 SnowContext *s = avctx->priv_data; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3775 int plane_index, level, orientation; |
2967 | 3776 |
3777 for(plane_index=0; plane_index<3; plane_index++){ | |
2138 | 3778 int w= s->avctx->width; |
3779 int h= s->avctx->height; | |
3780 | |
3781 if(plane_index){ | |
3782 w>>= s->chroma_h_shift; | |
3783 h>>= s->chroma_v_shift; | |
3784 } | |
3785 s->plane[plane_index].width = w; | |
3786 s->plane[plane_index].height= h; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3787 |
2160 | 3788 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); |
2138 | 3789 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3790 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3791 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 3792 |
2138 | 3793 b->buf= s->spatial_dwt_buffer; |
3794 b->level= level; | |
3795 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3796 b->width = (w + !(orientation&1))>>1; | |
3797 b->height= (h + !(orientation>1))>>1; | |
2967 | 3798 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3799 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3800 b->buf_x_offset = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3801 b->buf_y_offset = 0; |
2967 | 3802 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3803 if(orientation&1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3804 b->buf += (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3805 b->buf_x_offset = (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3806 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3807 if(orientation>1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3808 b->buf += b->stride>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3809 b->buf_y_offset = b->stride_line >> 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3810 } |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3811 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
2967 | 3812 |
2138 | 3813 if(level) |
3814 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3815 //FIXME avoid this realloc |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3816 av_freep(&b->x_coeff); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3817 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2138 | 3818 } |
3819 w= (w+1)>>1; | |
3820 h= (h+1)>>1; | |
3821 } | |
3822 } | |
2967 | 3823 |
2138 | 3824 return 0; |
3825 } | |
3826 | |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3827 static int qscale2qlog(int qscale){ |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3828 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3829 + 61*QROOT/8; //<64 >60 |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3830 } |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3831 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3832 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
3313 | 3833 { |
3834 /* estimate the frame's complexity as a sum of weighted dwt coefs. | |
3835 * FIXME we know exact mv bits at this point, | |
3836 * but ratecontrol isn't set up to include them. */ | |
3837 uint32_t coef_sum= 0; | |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3838 int level, orientation, delta_qlog; |
3313 | 3839 |
3840 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3841 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3842 SubBand *b= &s->plane[0].band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3843 IDWTELEM *buf= b->ibuf; |
3313 | 3844 const int w= b->width; |
3845 const int h= b->height; | |
3846 const int stride= b->stride; | |
4594 | 3847 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3313 | 3848 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3849 const int qdiv= (1<<16)/qmul; | |
3850 int x, y; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3851 //FIXME this is ugly |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3852 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3853 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3854 buf[x+y*stride]= b->buf[x+y*stride]; |
3313 | 3855 if(orientation==0) |
3856 decorrelate(s, b, buf, stride, 1, 0); | |
3857 for(y=0; y<h; y++) | |
3858 for(x=0; x<w; x++) | |
3859 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
3860 } | |
3861 } | |
3862 | |
3863 /* ugly, ratecontrol just takes a sqrt again */ | |
3864 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
3865 assert(coef_sum < INT_MAX); | |
3866 | |
3867 if(pict->pict_type == I_TYPE){ | |
3868 s->m.current_picture.mb_var_sum= coef_sum; | |
3869 s->m.current_picture.mc_mb_var_sum= 0; | |
3870 }else{ | |
3871 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
3872 s->m.current_picture.mb_var_sum= 0; | |
3873 } | |
3874 | |
3875 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
3766 | 3876 if (pict->quality < 0) |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3877 return INT_MIN; |
3313 | 3878 s->lambda= pict->quality * 3/2; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3879 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3880 s->qlog+= delta_qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3881 return delta_qlog; |
3313 | 3882 } |
2138 | 3883 |
5909 | 3884 static void calculate_visual_weight(SnowContext *s, Plane *p){ |
2138 | 3885 int width = p->width; |
3886 int height= p->height; | |
2198 | 3887 int level, orientation, x, y; |
2138 | 3888 |
3889 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3890 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3891 SubBand *b= &p->band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3892 IDWTELEM *ibuf= b->ibuf; |
2138 | 3893 int64_t error=0; |
2967 | 3894 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3895 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3896 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3897 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2138 | 3898 for(y=0; y<height; y++){ |
3899 for(x=0; x<width; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3900 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
2138 | 3901 error += d*d; |
3902 } | |
3903 } | |
3904 | |
3905 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
2164 | 3906 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/); |
2138 | 3907 } |
3908 } | |
3909 } | |
3910 | |
5702 | 3911 #define QUANTIZE2 0 |
3912 | |
3913 #if QUANTIZE2==1 | |
3914 #define Q2_STEP 8 | |
3915 | |
3916 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
3917 SubBand *b= &p->band[level][orientation]; | |
3918 int x, y; | |
3919 int xo=0; | |
3920 int yo=0; | |
3921 int step= 1 << (s->spatial_decomposition_count - level); | |
3922 | |
3923 if(orientation&1) | |
3924 xo= step>>1; | |
3925 if(orientation&2) | |
3926 yo= step>>1; | |
3927 | |
3928 //FIXME bias for non zero ? | |
3929 //FIXME optimize | |
3930 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
3931 for(y=0; y<p->height; y++){ | |
3932 for(x=0; x<p->width; x++){ | |
3933 int sx= (x-xo + step/2) / step / Q2_STEP; | |
3934 int sy= (y-yo + step/2) / step / Q2_STEP; | |
3935 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
3936 assert(sx>=0 && sy>=0 && sx < score_stride); | |
3937 v= ((v+8)>>4)<<4; | |
3938 score[sx + sy*score_stride] += v*v; | |
3939 assert(score[sx + sy*score_stride] >= 0); | |
3940 } | |
3941 } | |
3942 } | |
3943 | |
3944 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
3945 int level, orientation; | |
3946 | |
3947 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3948 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3949 SubBand *b= &p->band[level][orientation]; | |
3950 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3951 | |
3952 dequantize(s, b, dst, b->stride); | |
3953 } | |
3954 } | |
3955 } | |
3956 | |
3957 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
3958 int level, orientation, ys, xs, x, y, pass; | |
3959 IDWTELEM best_dequant[height * stride]; | |
3960 IDWTELEM idwt2_buffer[height * stride]; | |
3961 const int score_stride= (width + 10)/Q2_STEP; | |
3962 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3963 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3964 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
3965 | |
3966 //FIXME pass the copy cleanly ? | |
3967 | |
3968 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
3969 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3970 | |
3971 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3972 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3973 SubBand *b= &p->band[level][orientation]; | |
3974 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3975 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
3976 assert(src == b->buf); // code doesnt depen on this but its true currently | |
3977 | |
3978 quantize(s, b, dst, src, b->stride, s->qbias); | |
3979 } | |
3980 } | |
3981 for(pass=0; pass<1; pass++){ | |
3982 if(s->qbias == 0) //keyframe | |
3983 continue; | |
3984 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3985 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3986 SubBand *b= &p->band[level][orientation]; | |
3987 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3988 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3989 | |
3990 for(ys= 0; ys<Q2_STEP; ys++){ | |
3991 for(xs= 0; xs<Q2_STEP; xs++){ | |
3992 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3993 dequantize_all(s, p, idwt2_buffer, width, height); | |
3994 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3995 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3996 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3997 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3998 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3999 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
4000 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
4001 //FIXME try more then just -- | |
4002 } | |
4003 } | |
4004 dequantize_all(s, p, idwt2_buffer, width, height); | |
4005 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
4006 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
4007 for(y=ys; y<b->height; y+= Q2_STEP){ | |
4008 for(x=xs; x<b->width; x+= Q2_STEP){ | |
4009 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
4010 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
4011 best_score[score_idx]= score[score_idx]; | |
4012 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
4013 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
4014 //FIXME copy instead | |
4015 } | |
4016 } | |
4017 } | |
4018 } | |
4019 } | |
4020 } | |
4021 } | |
4022 } | |
4023 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly insteda of copy at the end | |
4024 } | |
4025 | |
5910 | 4026 #endif /* QUANTIZE2==1 */ |
5702 | 4027 |
2138 | 4028 static int encode_init(AVCodecContext *avctx) |
4029 { | |
4030 SnowContext *s = avctx->priv_data; | |
2198 | 4031 int plane_index; |
2138 | 4032 |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
4033 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
2628
511e3afc43e1
Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped")
melanson
parents:
2610
diff
changeset
|
4034 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
4035 "use vstrict=-2 / -strict -2 to use it anyway\n"); |
2151 | 4036 return -1; |
4037 } | |
2967 | 4038 |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4039 if(avctx->prediction_method == DWT_97 |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4040 && (avctx->flags & CODEC_FLAG_QSCALE) |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4041 && avctx->global_quality == 0){ |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4042 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n"); |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4043 return -1; |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4044 } |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
4045 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4046 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4047 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4048 s->chroma_h_shift= 1; //FIXME XXX |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4049 s->chroma_v_shift= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4050 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4051 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4052 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4053 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4054 for(plane_index=0; plane_index<3; plane_index++){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4055 s->plane[plane_index].diag_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4056 s->plane[plane_index].htaps= 6; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4057 s->plane[plane_index].hcoeff[0]= 40; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4058 s->plane[plane_index].hcoeff[1]= -10; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4059 s->plane[plane_index].hcoeff[2]= 2; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4060 s->plane[plane_index].fast_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4061 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4062 |
2138 | 4063 common_init(avctx); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4064 alloc_blocks(s); |
2967 | 4065 |
2138 | 4066 s->version=0; |
2967 | 4067 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4068 s->m.avctx = avctx; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4069 s->m.flags = avctx->flags; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4070 s->m.bit_rate= avctx->bit_rate; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4071 |
2138 | 4072 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
4073 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4074 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4075 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
2138 | 4076 h263_encode_init(&s->m); //mv_penalty |
4077 | |
3314 | 4078 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
4079 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4080 if(avctx->flags&CODEC_FLAG_PASS1){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4081 if(!avctx->stats_out) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4082 avctx->stats_out = av_mallocz(256); |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4083 } |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4084 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4085 if(ff_rate_control_init(&s->m) < 0) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4086 return -1; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4087 } |
3313 | 4088 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4089 |
2138 | 4090 avctx->coded_frame= &s->current_picture; |
4091 switch(avctx->pix_fmt){ | |
4092 // case PIX_FMT_YUV444P: | |
4093 // case PIX_FMT_YUV422P: | |
4094 case PIX_FMT_YUV420P: | |
4095 case PIX_FMT_GRAY8: | |
4096 // case PIX_FMT_YUV411P: | |
4097 // case PIX_FMT_YUV410P: | |
4098 s->colorspace_type= 0; | |
4099 break; | |
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
4100 /* case PIX_FMT_RGB32: |
2138 | 4101 s->colorspace= 1; |
4102 break;*/ | |
4103 default: | |
5908 | 4104 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); |
2138 | 4105 return -1; |
4106 } | |
4107 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
4108 s->chroma_h_shift= 1; | |
4109 s->chroma_v_shift= 1; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4110 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4111 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4112 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4113 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4114 s->avctx->get_buffer(s->avctx, &s->input_picture); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4115 |
3314 | 4116 if(s->avctx->me_method == ME_ITER){ |
4117 int i; | |
4118 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
4119 for(i=0; i<s->max_ref_frames; i++){ | |
4120 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
4121 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
4122 } | |
4123 } | |
4124 | |
2138 | 4125 return 0; |
4126 } | |
4127 | |
5924 | 4128 #define USE_HALFPEL_PLANE 0 |
4129 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4130 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4131 int p,x,y; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4132 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4133 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4134 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4135 for(p=0; p<3; p++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4136 int is_chroma= !!p; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4137 int w= s->avctx->width >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4138 int h= s->avctx->height >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4139 int ls= frame->linesize[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4140 uint8_t *src= frame->data[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4141 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4142 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4143 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4144 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4145 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4146 halfpel[0][p]= src; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4147 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4148 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4149 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4150 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4151 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4152 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4153 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4154 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4155 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4156 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4157 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4158 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4159 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4160 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4161 src= halfpel[1][p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4162 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4163 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4164 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4165 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4166 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4167 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4168 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4169 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4170 //FIXME border! |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4171 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4172 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4173 |
2138 | 4174 static int frame_start(SnowContext *s){ |
4175 AVFrame tmp; | |
2187 | 4176 int w= s->avctx->width; //FIXME round up to x16 ? |
4177 int h= s->avctx->height; | |
2138 | 4178 |
2187 | 4179 if(s->current_picture.data[0]){ |
4180 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); | |
4181 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
4182 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
4183 } | |
4184 | |
3314 | 4185 tmp= s->last_picture[s->max_ref_frames-1]; |
4186 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4187 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
5924 | 4188 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4189 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); |
3314 | 4190 s->last_picture[0]= s->current_picture; |
2138 | 4191 s->current_picture= tmp; |
2967 | 4192 |
3314 | 4193 if(s->keyframe){ |
4194 s->ref_frames= 0; | |
4195 }else{ | |
4196 int i; | |
4197 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
4198 if(i && s->last_picture[i-1].key_frame) | |
4199 break; | |
4200 s->ref_frames= i; | |
4201 } | |
4202 | |
2138 | 4203 s->current_picture.reference= 1; |
4204 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
4205 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
4206 return -1; | |
4207 } | |
2967 | 4208 |
3314 | 4209 s->current_picture.key_frame= s->keyframe; |
4210 | |
2138 | 4211 return 0; |
4212 } | |
4213 | |
4214 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
4215 SnowContext *s = avctx->priv_data; | |
2335 | 4216 RangeCoder * const c= &s->c; |
2138 | 4217 AVFrame *pict = data; |
4218 const int width= s->avctx->width; | |
4219 const int height= s->avctx->height; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4220 int level, orientation, plane_index, i, y; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4221 uint8_t rc_header_bak[sizeof(s->header_state)]; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4222 uint8_t rc_block_bak[sizeof(s->block_state)]; |
2138 | 4223 |
2335 | 4224 ff_init_range_encoder(c, buf, buf_size); |
4225 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2967 | 4226 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4227 for(i=0; i<3; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4228 int shift= !!i; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4229 for(y=0; y<(height>>shift); y++) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4230 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4231 &pict->data[i][y * pict->linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4232 width>>shift); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4233 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4234 s->new_picture = *pict; |
2138 | 4235 |
3313 | 4236 s->m.picture_number= avctx->frame_number; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4237 if(avctx->flags&CODEC_FLAG_PASS2){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4238 s->m.pict_type = |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4239 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4240 s->keyframe= pict->pict_type==FF_I_TYPE; |
3766 | 4241 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
3193 | 4242 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
3766 | 4243 if (pict->quality < 0) |
4244 return -1; | |
4245 } | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4246 }else{ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4247 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3313 | 4248 s->m.pict_type= |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4249 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4250 } |
2967 | 4251 |
3313 | 4252 if(s->pass1_rc && avctx->frame_number == 0) |
4253 pict->quality= 2*FF_QP2LAMBDA; | |
2161 | 4254 if(pict->quality){ |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4255 s->qlog= qscale2qlog(pict->quality); |
3313 | 4256 s->lambda = pict->quality * 3/2; |
4257 } | |
4258 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
2161 | 4259 s->qlog= LOSSLESS_QLOG; |
3313 | 4260 s->lambda = 0; |
4261 }//else keep previous frame's qlog until after motion est | |
2138 | 4262 |
4263 frame_start(s); | |
4264 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4265 s->m.current_picture_ptr= &s->m.current_picture; |
2138 | 4266 if(pict->pict_type == P_TYPE){ |
4267 int block_width = (width +15)>>4; | |
4268 int block_height= (height+15)>>4; | |
4269 int stride= s->current_picture.linesize[0]; | |
2967 | 4270 |
2138 | 4271 assert(s->current_picture.data[0]); |
3314 | 4272 assert(s->last_picture[0].data[0]); |
2967 | 4273 |
2138 | 4274 s->m.avctx= s->avctx; |
4275 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3314 | 4276 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
2138 | 4277 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
4278 s->m. last_picture_ptr= &s->m. last_picture; | |
4279 s->m.linesize= | |
4280 s->m. last_picture.linesize[0]= | |
4281 s->m. new_picture.linesize[0]= | |
4282 s->m.current_picture.linesize[0]= stride; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4283 s->m.uvlinesize= s->current_picture.linesize[1]; |
2138 | 4284 s->m.width = width; |
4285 s->m.height= height; | |
4286 s->m.mb_width = block_width; | |
4287 s->m.mb_height= block_height; | |
4288 s->m.mb_stride= s->m.mb_width+1; | |
4289 s->m.b8_stride= 2*s->m.mb_width+1; | |
4290 s->m.f_code=1; | |
4291 s->m.pict_type= pict->pict_type; | |
4292 s->m.me_method= s->avctx->me_method; | |
4293 s->m.me.scene_change_score=0; | |
4294 s->m.flags= s->avctx->flags; | |
4295 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
4296 s->m.out_format= FMT_H263; | |
4297 s->m.unrestricted_mv= 1; | |
4298 | |
3313 | 4299 s->m.lambda = s->lambda; |
2138 | 4300 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4301 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4302 |
2138 | 4303 s->m.dsp= s->dsp; //move |
4304 ff_init_me(&s->m); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4305 s->dsp= s->m.dsp; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4306 } |
2967 | 4307 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4308 if(s->pass1_rc){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4309 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4310 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4311 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4312 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4313 redo_frame: |
2967 | 4314 |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4315 if(pict->pict_type == I_TYPE) |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4316 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4317 else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4318 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4319 |
3313 | 4320 s->m.pict_type = pict->pict_type; |
2138 | 4321 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; |
4322 | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4323 common_init_after_header(avctx); |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4324 |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4325 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4326 for(plane_index=0; plane_index<3; plane_index++){ |
5909 | 4327 calculate_visual_weight(s, &s->plane[plane_index]); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4328 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4329 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4330 |
2138 | 4331 encode_header(s); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4332 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4333 encode_blocks(s, 1); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4334 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
2967 | 4335 |
2138 | 4336 for(plane_index=0; plane_index<3; plane_index++){ |
4337 Plane *p= &s->plane[plane_index]; | |
4338 int w= p->width; | |
4339 int h= p->height; | |
4340 int x, y; | |
2198 | 4341 // int bits= put_bits_count(&s->c.pb); |
2138 | 4342 |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4343 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
2138 | 4344 //FIXME optimize |
4345 if(pict->data[plane_index]) //FIXME gray hack | |
4346 for(y=0; y<h; y++){ | |
4347 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4348 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; |
2138 | 4349 } |
4350 } | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4351 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); |
2967 | 4352 |
4353 if( plane_index==0 | |
4354 && pict->pict_type == P_TYPE | |
3313 | 4355 && !(avctx->flags&CODEC_FLAG_PASS2) |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4356 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ |
2335 | 4357 ff_init_range_encoder(c, buf, buf_size); |
4358 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4359 pict->pict_type= FF_I_TYPE; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4360 s->keyframe=1; |
3314 | 4361 s->current_picture.key_frame=1; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4362 goto redo_frame; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4363 } |
2967 | 4364 |
2161 | 4365 if(s->qlog == LOSSLESS_QLOG){ |
4366 for(y=0; y<h; y++){ | |
4367 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4368 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; |
2161 | 4369 } |
4370 } | |
5575 | 4371 }else{ |
4372 for(y=0; y<h; y++){ | |
4373 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4374 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; |
5575 | 4375 } |
4376 } | |
2161 | 4377 } |
2967 | 4378 |
5702 | 4379 if(QUANTIZE2) |
4380 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); | |
4381 else | |
4382 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
2161 | 4383 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4384 if(s->pass1_rc && plane_index==0){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4385 int delta_qlog = ratecontrol_1pass(s, pict); |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
4386 if (delta_qlog <= INT_MIN) |
3766 | 4387 return -1; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4388 if(delta_qlog){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4389 //reordering qlog in the bitstream would eliminate this reset |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4390 ff_init_range_encoder(c, buf, buf_size); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4391 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4392 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4393 encode_header(s); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4394 encode_blocks(s, 0); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4395 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4396 } |
3313 | 4397 |
2138 | 4398 for(level=0; level<s->spatial_decomposition_count; level++){ |
4399 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4400 SubBand *b= &p->band[level][orientation]; | |
2967 | 4401 |
5702 | 4402 if(!QUANTIZE2) |
4403 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); | |
2138 | 4404 if(orientation==0) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4405 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4406 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); |
2138 | 4407 assert(b->parent==NULL || b->parent->stride == b->stride*2); |
4408 if(orientation==0) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4409 correlate(s, b, b->ibuf, b->stride, 1, 0); |
2138 | 4410 } |
4411 } | |
4412 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); | |
4413 | |
4414 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4415 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4416 SubBand *b= &p->band[level][orientation]; | |
4417 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4418 dequantize(s, b, b->ibuf, b->stride); |
2138 | 4419 } |
4420 } | |
2161 | 4421 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4422 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2161 | 4423 if(s->qlog == LOSSLESS_QLOG){ |
4424 for(y=0; y<h; y++){ | |
4425 for(x=0; x<w; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4426 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; |
2161 | 4427 } |
4428 } | |
4429 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
4430 {START_TIMER |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4431 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
4432 STOP_TIMER("pred-conv")} |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4433 }else{ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4434 //ME/MC only |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4435 if(pict->pict_type == I_TYPE){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4436 for(y=0; y<h; y++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4437 for(x=0; x<w; x++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4438 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4439 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4440 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4441 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4442 }else{ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4443 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4444 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4445 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4446 } |
2138 | 4447 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4448 int64_t error= 0; | |
2967 | 4449 |
2138 | 4450 if(pict->data[plane_index]) //FIXME gray hack |
4451 for(y=0; y<h; y++){ | |
4452 for(x=0; x<w; x++){ | |
2161 | 4453 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
2138 | 4454 error += d*d; |
4455 } | |
4456 } | |
4457 s->avctx->error[plane_index] += error; | |
2232 | 4458 s->current_picture.error[plane_index] = error; |
2138 | 4459 } |
4460 } | |
4461 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4462 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
3314 | 4463 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4464 for(i=0; i<9; i++) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4465 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4466 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4467 } |
2138 | 4468 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4469 s->current_picture.coded_picture_number = avctx->frame_number; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4470 s->current_picture.pict_type = pict->pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4471 s->current_picture.quality = pict->quality; |
3313 | 4472 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
4473 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
4474 s->m.current_picture.display_picture_number = | |
4475 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
4476 s->m.current_picture.quality = pict->quality; | |
4477 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
4478 if(s->pass1_rc) | |
3766 | 4479 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
4480 return -1; | |
3313 | 4481 if(avctx->flags&CODEC_FLAG_PASS1) |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4482 ff_write_pass1_stats(&s->m); |
3313 | 4483 s->m.last_pict_type = s->m.pict_type; |
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4484 avctx->frame_bits = s->m.frame_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4485 avctx->mv_bits = s->m.mv_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4486 avctx->misc_bits = s->m.misc_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4487 avctx->p_tex_bits = s->m.p_tex_bits; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4488 |
2138 | 4489 emms_c(); |
2967 | 4490 |
2335 | 4491 return ff_rac_terminate(c); |
2138 | 4492 } |
4493 | |
4494 static void common_end(SnowContext *s){ | |
3314 | 4495 int plane_index, level, orientation, i; |
2192 | 4496 |
2138 | 4497 av_freep(&s->spatial_dwt_buffer); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4498 av_freep(&s->spatial_idwt_buffer); |
2138 | 4499 |
2967 | 4500 av_freep(&s->m.me.scratchpad); |
2138 | 4501 av_freep(&s->m.me.map); |
4502 av_freep(&s->m.me.score_map); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4503 av_freep(&s->m.obmc_scratchpad); |
2967 | 4504 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4505 av_freep(&s->block); |
2192 | 4506 |
3314 | 4507 for(i=0; i<MAX_REF_FRAMES; i++){ |
4508 av_freep(&s->ref_mvs[i]); | |
4509 av_freep(&s->ref_scores[i]); | |
4510 if(s->last_picture[i].data[0]) | |
4511 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4512 } | |
4513 | |
2967 | 4514 for(plane_index=0; plane_index<3; plane_index++){ |
2192 | 4515 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
4516 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4517 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 4518 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4519 av_freep(&b->x_coeff); |
2192 | 4520 } |
4521 } | |
4522 } | |
2138 | 4523 } |
4524 | |
4525 static int encode_end(AVCodecContext *avctx) | |
4526 { | |
4527 SnowContext *s = avctx->priv_data; | |
4528 | |
4529 common_end(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4530 av_free(avctx->stats_out); |
2138 | 4531 |
4532 return 0; | |
4533 } | |
4534 | |
4535 static int decode_init(AVCodecContext *avctx) | |
4536 { | |
2635 | 4537 avctx->pix_fmt= PIX_FMT_YUV420P; |
2138 | 4538 |
4539 common_init(avctx); | |
2967 | 4540 |
2138 | 4541 return 0; |
4542 } | |
4543 | |
4544 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ | |
4545 SnowContext *s = avctx->priv_data; | |
2335 | 4546 RangeCoder * const c= &s->c; |
2138 | 4547 int bytes_read; |
4548 AVFrame *picture = data; | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4549 int level, orientation, plane_index, i; |
2138 | 4550 |
2335 | 4551 ff_init_range_decoder(c, buf, buf_size); |
4552 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2138 | 4553 |
4554 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
5668 | 4555 if(decode_header(s)<0) |
4556 return -1; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4557 common_init_after_header(avctx); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4558 |
5662
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4559 // realloc slice buffer for the case that spatial_decomposition_count changed |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4560 slice_buffer_destroy(&s->sb); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4561 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4562 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4563 for(plane_index=0; plane_index<3; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4564 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4565 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4566 && p->hcoeff[1]==-10 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4567 && p->hcoeff[2]==2; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4568 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4569 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4570 if(!s->block) alloc_blocks(s); |
2138 | 4571 |
4572 frame_start(s); | |
4573 //keyframe flag dupliaction mess FIXME | |
4574 if(avctx->debug&FF_DEBUG_PICT_INFO) | |
4575 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2967 | 4576 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4577 decode_blocks(s); |
2138 | 4578 |
4579 for(plane_index=0; plane_index<3; plane_index++){ | |
4580 Plane *p= &s->plane[plane_index]; | |
4581 int w= p->width; | |
4582 int h= p->height; | |
4583 int x, y; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4584 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
2967 | 4585 |
2138 | 4586 if(s->avctx->debug&2048){ |
4587 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4588 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
2138 | 4589 |
4590 for(y=0; y<h; y++){ | |
4591 for(x=0; x<w; x++){ | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
4592 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; |
2138 | 4593 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; |
4594 } | |
4595 } | |
4596 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4597 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4598 { START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4599 for(level=0; level<s->spatial_decomposition_count; level++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4600 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4601 SubBand *b= &p->band[level][orientation]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4602 unpack_coeffs(s, b, b->parent, orientation); |
2138 | 4603 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4604 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4605 STOP_TIMER("unpack coeffs"); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4606 } |
2138 | 4607 |
2562 | 4608 {START_TIMER |
4609 const int mb_h= s->b_height << s->block_max_depth; | |
4610 const int block_size = MB_SIZE >> s->block_max_depth; | |
4611 const int block_w = plane_index ? block_size/2 : block_size; | |
4612 int mb_y; | |
4613 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
4614 int yd=0, yq=0; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4615 int y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4616 int end_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4617 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4618 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2562 | 4619 for(mb_y=0; mb_y<=mb_h; mb_y++){ |
2967 | 4620 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4621 int slice_starty = block_w*mb_y; |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4622 int slice_h = block_w*(mb_y+1); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4623 if (!(s->keyframe || s->avctx->debug&512)){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4624 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4625 slice_h -= (block_w >> 1); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4626 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4627 |
2967 | 4628 { |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4629 START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4630 for(level=0; level<s->spatial_decomposition_count; level++){ |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4631 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4632 SubBand *b= &p->band[level][orientation]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4633 int start_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4634 int end_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4635 int our_mb_start = mb_y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4636 int our_mb_end = (mb_y + 1); |
3012 | 4637 const int extra= 3; |
4638 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4639 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4640 if (!(s->keyframe || s->avctx->debug&512)){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4641 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4642 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4643 } |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4644 start_y = FFMIN(b->height, start_y); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4645 end_y = FFMIN(b->height, end_y); |
2967 | 4646 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4647 if (start_y != end_y){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4648 if (orientation == 0){ |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4649 SubBand * correlate_band = &p->band[0][0]; |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4650 int correlate_end_y = FFMIN(b->height, end_y + 1); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4651 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4652 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4653 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4654 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4655 } |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4656 else |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4657 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4658 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4659 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4660 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4661 STOP_TIMER("decode_subband_slice"); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4662 } |
2967 | 4663 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4664 { START_TIMER |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4665 for(; yd<slice_h; yd+=4){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
4666 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4667 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4668 STOP_TIMER("idwt slice");} |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4669 |
2967 | 4670 |
2161 | 4671 if(s->qlog == LOSSLESS_QLOG){ |
2562 | 4672 for(; yq<slice_h && yq<h; yq++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4673 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); |
2161 | 4674 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4675 line[x] <<= FRAC_BITS; |
2161 | 4676 } |
4677 } | |
4678 } | |
2562 | 4679 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4680 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); |
2967 | 4681 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4682 y = FFMIN(p->height, slice_starty); |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4683 end_y = FFMIN(p->height, slice_h); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4684 while(y < end_y) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4685 slice_buffer_release(&s->sb, y++); |
2562 | 4686 } |
2967 | 4687 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4688 slice_buffer_flush(&s->sb); |
2967 | 4689 |
2562 | 4690 STOP_TIMER("idwt + predict_slices")} |
2138 | 4691 } |
2967 | 4692 |
2138 | 4693 emms_c(); |
4694 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4695 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
3314 | 4696 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4697 for(i=0; i<9; i++) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4698 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4699 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4700 } |
2138 | 4701 |
2967 | 4702 if(!(s->avctx->debug&2048)) |
2138 | 4703 *picture= s->current_picture; |
4704 else | |
4705 *picture= s->mconly_picture; | |
2967 | 4706 |
2138 | 4707 *data_size = sizeof(AVFrame); |
2967 | 4708 |
2335 | 4709 bytes_read= c->bytestream - c->bytestream_start; |
4710 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2138 | 4711 |
4712 return bytes_read; | |
4713 } | |
4714 | |
4715 static int decode_end(AVCodecContext *avctx) | |
4716 { | |
4717 SnowContext *s = avctx->priv_data; | |
4718 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4719 slice_buffer_destroy(&s->sb); |
2967 | 4720 |
2138 | 4721 common_end(s); |
4722 | |
4723 return 0; | |
4724 } | |
4725 | |
4726 AVCodec snow_decoder = { | |
4727 "snow", | |
4728 CODEC_TYPE_VIDEO, | |
4729 CODEC_ID_SNOW, | |
4730 sizeof(SnowContext), | |
4731 decode_init, | |
4732 NULL, | |
4733 decode_end, | |
4734 decode_frame, | |
4735 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
4736 NULL | |
4737 }; | |
4738 | |
5224 | 4739 #ifdef CONFIG_SNOW_ENCODER |
2138 | 4740 AVCodec snow_encoder = { |
4741 "snow", | |
4742 CODEC_TYPE_VIDEO, | |
4743 CODEC_ID_SNOW, | |
4744 sizeof(SnowContext), | |
4745 encode_init, | |
4746 encode_frame, | |
4747 encode_end, | |
4748 }; | |
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4749 #endif |
2138 | 4750 |
4751 | |
4752 #if 0 | |
4753 #undef malloc | |
4754 #undef free | |
4755 #undef printf | |
5380
389366aa3458
Fix the self tests which are contained in some codecs and are using random().
takis
parents:
5254
diff
changeset
|
4756 #undef random |
2138 | 4757 |
4758 int main(){ | |
4759 int width=256; | |
4760 int height=256; | |
4761 int buffer[2][width*height]; | |
4762 SnowContext s; | |
4763 int i; | |
4764 s.spatial_decomposition_count=6; | |
4765 s.spatial_decomposition_type=1; | |
2967 | 4766 |
2138 | 4767 printf("testing 5/3 DWT\n"); |
4768 for(i=0; i<width*height; i++) | |
4769 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
2967 | 4770 |
2951 | 4771 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4772 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4773 |
2138 | 4774 for(i=0; i<width*height; i++) |
4775 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); | |
4776 | |
4777 printf("testing 9/7 DWT\n"); | |
4778 s.spatial_decomposition_type=0; | |
4779 for(i=0; i<width*height; i++) | |
4780 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
2967 | 4781 |
2951 | 4782 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4783 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4784 |
2138 | 4785 for(i=0; i<width*height; i++) |
4001 | 4786 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); |
2967 | 4787 |
2951 | 4788 #if 0 |
2138 | 4789 printf("testing AC coder\n"); |
4790 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4791 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
2138 | 4792 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4793 |
2138 | 4794 for(i=-256; i<256; i++){ |
4795 START_TIMER | |
4001 | 4796 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
2138 | 4797 STOP_TIMER("put_symbol") |
4798 } | |
2335 | 4799 ff_rac_terminate(&s.c); |
2138 | 4800 |
4801 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4802 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
2138 | 4803 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4804 |
2138 | 4805 for(i=-256; i<256; i++){ |
4806 int j; | |
4807 START_TIMER | |
4808 j= get_symbol(&s.c, s.header_state, 1); | |
4809 STOP_TIMER("get_symbol") | |
4001 | 4810 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
2138 | 4811 } |
2951 | 4812 #endif |
2138 | 4813 { |
4814 int level, orientation, x, y; | |
4815 int64_t errors[8][4]; | |
4816 int64_t g=0; | |
4817 | |
4818 memset(errors, 0, sizeof(errors)); | |
4819 s.spatial_decomposition_count=3; | |
4820 s.spatial_decomposition_type=0; | |
4821 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4822 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4823 int w= width >> (s.spatial_decomposition_count-level); | |
4824 int h= height >> (s.spatial_decomposition_count-level); | |
4825 int stride= width << (s.spatial_decomposition_count-level); | |
4826 DWTELEM *buf= buffer[0]; | |
4827 int64_t error=0; | |
4828 | |
4829 if(orientation&1) buf+=w; | |
4830 if(orientation>1) buf+=stride>>1; | |
2967 | 4831 |
2138 | 4832 memset(buffer[0], 0, sizeof(int)*width*height); |
4833 buf[w/2 + h/2*stride]= 256*256; | |
2951 | 4834 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4835 for(y=0; y<height; y++){ |
4836 for(x=0; x<width; x++){ | |
4837 int64_t d= buffer[0][x + y*width]; | |
4838 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4839 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); |
2138 | 4840 } |
4001 | 4841 if(FFABS(height/2-y)<9 && level==2) printf("\n"); |
2138 | 4842 } |
4843 error= (int)(sqrt(error)+0.5); | |
4844 errors[level][orientation]= error; | |
4845 if(g) g=ff_gcd(g, error); | |
4846 else g= error; | |
4847 } | |
4848 } | |
4849 printf("static int const visual_weight[][4]={\n"); | |
4850 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4851 printf(" {"); | |
4852 for(orientation=0; orientation<4; orientation++){ | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4853 printf("%8"PRId64",", errors[level][orientation]/g); |
2138 | 4854 } |
4855 printf("},\n"); | |
4856 } | |
4857 printf("};\n"); | |
4858 { | |
4859 int level=2; | |
4860 int orientation=3; | |
4861 int w= width >> (s.spatial_decomposition_count-level); | |
4862 int h= height >> (s.spatial_decomposition_count-level); | |
4863 int stride= width << (s.spatial_decomposition_count-level); | |
4864 DWTELEM *buf= buffer[0]; | |
4865 int64_t error=0; | |
4866 | |
4867 buf+=w; | |
4868 buf+=stride>>1; | |
2967 | 4869 |
2138 | 4870 memset(buffer[0], 0, sizeof(int)*width*height); |
4871 #if 1 | |
4872 for(y=0; y<height; y++){ | |
4873 for(x=0; x<width; x++){ | |
4874 int tab[4]={0,2,3,1}; | |
4875 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
4876 } | |
4877 } | |
2951 | 4878 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4879 #else |
4880 for(y=0; y<h; y++){ | |
4881 for(x=0; x<w; x++){ | |
4882 buf[x + y*stride ]=169; | |
4883 buf[x + y*stride-w]=64; | |
4884 } | |
4885 } | |
2951 | 4886 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4887 #endif |
4888 for(y=0; y<height; y++){ | |
4889 for(x=0; x<width; x++){ | |
4890 int64_t d= buffer[0][x + y*width]; | |
4891 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4892 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
2138 | 4893 } |
4001 | 4894 if(FFABS(height/2-y)<9) printf("\n"); |
2138 | 4895 } |
4896 } | |
4897 | |
4898 } | |
4899 return 0; | |
4900 } | |
5910 | 4901 #endif /* 0 */ |