Mercurial > libavcodec.hg
annotate snow.c @ 9501:880d9562d218 libavcodec
Avoid code duplication in xan_unpack for the final memcpy.
author | reimar |
---|---|
date | Sat, 18 Apr 2009 17:26:00 +0000 |
parents | fc15a3b977bf |
children | 8178935a95d0 |
rev | line source |
---|---|
2138 | 1 /* |
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2138 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2138 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2138 | 19 */ |
20 | |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
2335 | 24 |
25 #include "rangecoder.h" | |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8611
diff
changeset
|
26 #include "mathops.h" |
2138 | 27 |
28 #include "mpegvideo.h" | |
29 | |
30 #undef NDEBUG | |
31 #include <assert.h> | |
32 | |
33 static const int8_t quant3[256]={ | |
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
50 }; | |
51 static const int8_t quant3b[256]={ | |
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
68 }; | |
2596 | 69 static const int8_t quant3bA[256]={ |
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
86 }; | |
2138 | 87 static const int8_t quant5[256]={ |
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
104 }; | |
105 static const int8_t quant7[256]={ | |
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
122 }; | |
123 static const int8_t quant9[256]={ | |
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
140 }; | |
141 static const int8_t quant11[256]={ | |
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
158 }; | |
159 static const int8_t quant13[256]={ | |
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
176 }; | |
177 | |
178 #if 0 //64*cubic | |
179 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 212 //error:0.000022 |
213 }; | |
214 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
2138 | 231 //error:0.000033 |
232 }; | |
233 #elif 1 // 64*linear | |
234 static const uint8_t obmc32[1024]={ | |
3206 | 235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
2138 | 267 //error:0.000020 |
268 }; | |
269 static const uint8_t obmc16[256]={ | |
3206 | 270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
2138 | 286 //error:0.000015 |
287 }; | |
288 #else //64*cos | |
289 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 322 //error:0.000022 |
323 }; | |
324 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
2138 | 341 //error:0.000022 |
342 }; | |
5910 | 343 #endif /* 0 */ |
2138 | 344 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
346 static const uint8_t obmc8[64]={ |
3206 | 347 4, 12, 20, 28, 28, 20, 12, 4, |
348 12, 36, 60, 84, 84, 60, 36, 12, | |
349 20, 60,100,140,140,100, 60, 20, | |
350 28, 84,140,196,196,140, 84, 28, | |
351 28, 84,140,196,196,140, 84, 28, | |
352 20, 60,100,140,140,100, 60, 20, | |
353 12, 36, 60, 84, 84, 60, 36, 12, | |
354 4, 12, 20, 28, 28, 20, 12, 4, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
359 static const uint8_t obmc4[16]={ |
3206 | 360 16, 48, 48, 16, |
361 48,144,144, 48, | |
362 48,144,144, 48, | |
363 16, 48, 48, 16, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 |
7129 | 367 static const uint8_t * const obmc_tab[4]={ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 obmc32, obmc16, obmc8, obmc4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
370 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
372 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 typedef struct BlockNode{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
375 int16_t my; |
3314 | 376 uint8_t ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t color[3]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 uint8_t type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 //#define TYPE_SPLIT 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
380 #define BLOCK_INTRA 1 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
381 #define BLOCK_OPT 2 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 //#define TYPE_NOCOLOR 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 uint8_t level; //FIXME merge into type? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 }BlockNode; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
385 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 static const BlockNode null_block= { //FIXME add border maybe |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .color= {128,128,128}, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .mx= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
389 .my= 0, |
3314 | 390 .ref= 0, |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .type= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 .level= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 }; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
394 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define LOG2_MB_SIZE 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
396 #define MB_SIZE (1<<LOG2_MB_SIZE) |
5575 | 397 #define ENCODER_EXTRA_BITS 4 |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
398 #define HTAPS_MAX 8 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
399 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
400 typedef struct x_and_coeff{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
401 int16_t x; |
2596 | 402 uint16_t coeff; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
403 } x_and_coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
404 |
2138 | 405 typedef struct SubBand{ |
406 int level; | |
407 int stride; | |
408 int width; | |
409 int height; | |
6412 | 410 int qlog; ///< log(qscale)/log[2^(1/6)] |
2138 | 411 DWTELEM *buf; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
412 IDWTELEM *ibuf; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 int buf_y_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
415 int stride_line; ///< Stride measured in lines, not pixels. |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
416 x_and_coeff * x_coeff; |
2138 | 417 struct SubBand *parent; |
418 uint8_t state[/*7*2*/ 7 + 512][32]; | |
419 }SubBand; | |
420 | |
421 typedef struct Plane{ | |
422 int width; | |
423 int height; | |
424 SubBand band[MAX_DECOMPOSITIONS][4]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
425 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
426 int htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
427 int8_t hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
428 int diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
429 int fast_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
430 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
431 int last_htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
432 int8_t last_hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
433 int last_diag_mc; |
2138 | 434 }Plane; |
435 | |
436 typedef struct SnowContext{ | |
6412 | 437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 438 |
439 AVCodecContext *avctx; | |
2335 | 440 RangeCoder c; |
2138 | 441 DSPContext dsp; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
442 AVFrame new_picture; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
443 AVFrame input_picture; ///< new_picture with the internal linesizes |
2138 | 444 AVFrame current_picture; |
3314 | 445 AVFrame last_picture[MAX_REF_FRAMES]; |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; |
2138 | 447 AVFrame mconly_picture; |
448 // uint8_t q_context[16]; | |
449 uint8_t header_state[32]; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
450 uint8_t block_state[128 + 32*128]; |
2138 | 451 int keyframe; |
2199 | 452 int always_reset; |
2138 | 453 int version; |
454 int spatial_decomposition_type; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
455 int last_spatial_decomposition_type; |
2138 | 456 int temporal_decomposition_type; |
457 int spatial_decomposition_count; | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
458 int last_spatial_decomposition_count; |
2138 | 459 int temporal_decomposition_count; |
3314 | 460 int max_ref_frames; |
461 int ref_frames; | |
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
463 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
2138 | 464 DWTELEM *spatial_dwt_buffer; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
465 IDWTELEM *spatial_idwt_buffer; |
2138 | 466 int colorspace_type; |
467 int chroma_h_shift; | |
468 int chroma_v_shift; | |
469 int spatial_scalability; | |
470 int qlog; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
471 int last_qlog; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 int lambda; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
473 int lambda2; |
3313 | 474 int pass1_rc; |
2138 | 475 int mv_scale; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
476 int last_mv_scale; |
2138 | 477 int qbias; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
478 int last_qbias; |
2138 | 479 #define QBIAS_SHIFT 3 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
480 int b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
481 int b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
482 int block_max_depth; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
483 int last_block_max_depth; |
2138 | 484 Plane plane[MAX_PLANES]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
485 BlockNode *block; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
486 #define ME_CACHE_SIZE 1024 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
487 int me_cache[ME_CACHE_SIZE]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
488 int me_cache_generation; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
489 slice_buffer sb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
490 |
6412 | 491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
492 |
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
493 uint8_t *scratchbuf; |
2138 | 494 }SnowContext; |
495 | |
2562 | 496 typedef struct { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
497 IDWTELEM *b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
498 IDWTELEM *b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
499 IDWTELEM *b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
500 IDWTELEM *b3; |
2562 | 501 int y; |
8308 | 502 } DWTCompose; |
2562 | 503 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
506 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
507 static void iterative_me(SnowContext *s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
508 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 int i; |
2967 | 512 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
513 buf->base_buffer = base_buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
514 buf->line_count = line_count; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
515 buf->line_width = line_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
516 buf->data_count = max_allocated_lines; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
2967 | 519 |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
520 for(i = 0; i < max_allocated_lines; i++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 } |
2967 | 523 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
524 buf->data_stack_top = max_allocated_lines - 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
525 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
528 { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
529 IDWTELEM * buffer; |
2967 | 530 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
531 assert(buf->data_stack_top >= 0); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 // assert(!buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
533 if (buf->line[line]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
534 return buf->line[line]; |
2967 | 535 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
536 buffer = buf->data_stack[buf->data_stack_top]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 buf->data_stack_top--; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
538 buf->line[line] = buffer; |
2967 | 539 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 return buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
541 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
543 static void slice_buffer_release(slice_buffer * buf, int line) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
545 IDWTELEM * buffer; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
546 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 assert(line >= 0 && line < buf->line_count); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
548 assert(buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
550 buffer = buf->line[line]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 buf->data_stack_top++; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 buf->data_stack[buf->data_stack_top] = buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 buf->line[line] = NULL; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 static void slice_buffer_flush(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 int i; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
559 for(i = 0; i < buf->line_count; i++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
560 if (buf->line[i]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
561 slice_buffer_release(buf, i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
563 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 static void slice_buffer_destroy(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 slice_buffer_flush(buf); |
2967 | 569 |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
570 for(i = buf->data_count - 1; i >= 0; i--){ |
3190 | 571 av_freep(&buf->data_stack[i]); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
572 } |
3190 | 573 av_freep(&buf->data_stack); |
574 av_freep(&buf->line); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
575 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
576 |
2979 | 577 #ifdef __sgi |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
578 // Avoid a name clash on SGI IRIX |
2979 | 579 #undef qexp |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
580 #endif |
2246 | 581 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
2600 | 582 static uint8_t qexp[QROOT]; |
2138 | 583 |
584 static inline int mirror(int v, int m){ | |
2998 | 585 while((unsigned)v > (unsigned)m){ |
586 v=-v; | |
587 if(v<0) v+= 2*m; | |
588 } | |
589 return v; | |
2138 | 590 } |
591 | |
2335 | 592 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
2138 | 593 int i; |
594 | |
595 if(v){ | |
4001 | 596 const int a= FFABS(v); |
2138 | 597 const int e= av_log2(a); |
598 #if 1 | |
2967 | 599 const int el= FFMIN(e, 10); |
2335 | 600 put_rac(c, state+0, 0); |
2138 | 601 |
602 for(i=0; i<el; i++){ | |
2335 | 603 put_rac(c, state+1+i, 1); //1..10 |
2138 | 604 } |
605 for(; i<e; i++){ | |
2335 | 606 put_rac(c, state+1+9, 1); //1..10 |
2138 | 607 } |
2335 | 608 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 609 |
610 for(i=e-1; i>=el; i--){ | |
2335 | 611 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
2138 | 612 } |
613 for(; i>=0; i--){ | |
2335 | 614 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 615 } |
616 | |
617 if(is_signed) | |
2335 | 618 put_rac(c, state+11 + el, v < 0); //11..21 |
2138 | 619 #else |
2967 | 620 |
2335 | 621 put_rac(c, state+0, 0); |
2138 | 622 if(e<=9){ |
623 for(i=0; i<e; i++){ | |
2335 | 624 put_rac(c, state+1+i, 1); //1..10 |
2138 | 625 } |
2335 | 626 put_rac(c, state+1+i, 0); |
2138 | 627 |
628 for(i=e-1; i>=0; i--){ | |
2335 | 629 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 630 } |
631 | |
632 if(is_signed) | |
2335 | 633 put_rac(c, state+11 + e, v < 0); //11..21 |
2138 | 634 }else{ |
635 for(i=0; i<e; i++){ | |
2335 | 636 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
2138 | 637 } |
2335 | 638 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 639 |
640 for(i=e-1; i>=0; i--){ | |
2335 | 641 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
2138 | 642 } |
643 | |
644 if(is_signed) | |
2335 | 645 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
2138 | 646 } |
5910 | 647 #endif /* 1 */ |
2138 | 648 }else{ |
2335 | 649 put_rac(c, state+0, 1); |
2138 | 650 } |
651 } | |
652 | |
2335 | 653 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
654 if(get_rac(c, state+0)) | |
2138 | 655 return 0; |
656 else{ | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
657 int i, e, a; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
658 e= 0; |
2335 | 659 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
660 e++; |
2138 | 661 } |
662 | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
663 a= 1; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
664 for(i=e-1; i>=0; i--){ |
2335 | 665 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
666 } |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
667 |
2335 | 668 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
2138 | 669 return -a; |
670 else | |
671 return a; | |
672 } | |
673 } | |
674 | |
2335 | 675 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
676 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
677 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
678 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
679 assert(v>=0); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
680 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
681 |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
682 while(v >= r){ |
2335 | 683 put_rac(c, state+4+log2, 1); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
684 v -= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
685 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
686 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
687 } |
2335 | 688 put_rac(c, state+4+log2, 0); |
2967 | 689 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
690 for(i=log2-1; i>=0; i--){ |
2335 | 691 put_rac(c, state+31-i, (v>>i)&1); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
692 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
693 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
694 |
2335 | 695 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
696 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
697 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
698 int v=0; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
699 |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
700 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
701 |
2335 | 702 while(get_rac(c, state+4+log2)){ |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
703 v+= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
704 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
705 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
706 } |
2967 | 707 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
708 for(i=log2-1; i>=0; i--){ |
2335 | 709 v+= get_rac(c, state+31-i)<<i; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
710 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
711 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 return v; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
713 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
714 |
5627 | 715 static av_always_inline void |
716 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
717 int dst_step, int src_step, int ref_step, | |
718 int width, int mul, int add, int shift, | |
719 int highpass, int inverse){ | |
2138 | 720 const int mirror_left= !highpass; |
721 const int mirror_right= (width&1) ^ highpass; | |
722 const int w= (width>>1) - 1 + (highpass & width); | |
723 int i; | |
724 | |
725 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
726 if(mirror_left){ | |
727 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
728 dst += dst_step; | |
729 src += src_step; | |
730 } | |
2967 | 731 |
2138 | 732 for(i=0; i<w; i++){ |
5627 | 733 dst[i*dst_step] = |
734 LIFT(src[i*src_step], | |
735 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
736 inverse); | |
2138 | 737 } |
2967 | 738 |
2138 | 739 if(mirror_right){ |
5627 | 740 dst[w*dst_step] = |
741 LIFT(src[w*src_step], | |
742 ((mul*2*ref[w*ref_step]+add)>>shift), | |
743 inverse); | |
2138 | 744 } |
745 } | |
746 | |
5627 | 747 static av_always_inline void |
748 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
749 int dst_step, int src_step, int ref_step, | |
750 int width, int mul, int add, int shift, | |
751 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
752 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
753 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
754 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
755 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
756 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
757 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
758 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
759 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
760 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
761 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
762 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
763 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
764 for(i=0; i<w; i++){ |
5627 | 765 dst[i*dst_step] = |
766 LIFT(src[i*src_step], | |
767 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
768 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
769 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
770 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
771 if(mirror_right){ |
5627 | 772 dst[w*dst_step] = |
773 LIFT(src[w*src_step], | |
774 ((mul*2*ref[w*ref_step]+add)>>shift), | |
775 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
776 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
777 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
778 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
779 #ifndef liftS |
5627 | 780 static av_always_inline void |
781 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
782 int dst_step, int src_step, int ref_step, | |
783 int width, int mul, int add, int shift, | |
784 int highpass, int inverse){ | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
785 const int mirror_left= !highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
786 const int mirror_right= (width&1) ^ highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
787 const int w= (width>>1) - 1 + (highpass & width); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
788 int i; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
789 |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
790 assert(shift == 4); |
5627 | 791 #define LIFTS(src, ref, inv) \ |
792 ((inv) ? \ | |
793 (src) + (((ref) + 4*(src))>>shift): \ | |
794 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
795 if(mirror_left){ |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
796 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
797 dst += dst_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
798 src += src_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 } |
2967 | 800 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
801 for(i=0; i<w; i++){ |
5627 | 802 dst[i*dst_step] = |
803 LIFTS(src[i*src_step], | |
804 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
805 inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
806 } |
2967 | 807 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
808 if(mirror_right){ |
5627 | 809 dst[w*dst_step] = |
810 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
811 } |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
812 } |
5627 | 813 static av_always_inline void |
814 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
815 int dst_step, int src_step, int ref_step, | |
816 int width, int mul, int add, int shift, | |
817 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
818 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
819 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
820 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
821 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
822 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
823 assert(shift == 4); |
5627 | 824 #define LIFTS(src, ref, inv) \ |
825 ((inv) ? \ | |
826 (src) + (((ref) + 4*(src))>>shift): \ | |
827 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
828 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
829 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
830 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
831 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 for(i=0; i<w; i++){ |
5627 | 835 dst[i*dst_step] = |
836 LIFTS(src[i*src_step], | |
837 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
838 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
839 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
840 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
841 if(mirror_right){ |
5627 | 842 dst[w*dst_step] = |
843 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
844 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
845 } |
6413 | 846 #endif /* ! liftS */ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
847 |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
848 static void horizontal_decompose53i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
849 DWTELEM temp[width]; |
2138 | 850 const int width2= width>>1; |
2893 | 851 int x; |
2138 | 852 const int w2= (width+1)>>1; |
853 | |
854 for(x=0; x<width2; x++){ | |
855 temp[x ]= b[2*x ]; | |
856 temp[x+w2]= b[2*x + 1]; | |
857 } | |
858 if(width&1) | |
859 temp[x ]= b[2*x ]; | |
860 #if 0 | |
2893 | 861 { |
862 int A1,A2,A3,A4; | |
2138 | 863 A2= temp[1 ]; |
864 A4= temp[0 ]; | |
865 A1= temp[0+width2]; | |
866 A1 -= (A2 + A4)>>1; | |
867 A4 += (A1 + 1)>>1; | |
868 b[0+width2] = A1; | |
869 b[0 ] = A4; | |
870 for(x=1; x+1<width2; x+=2){ | |
871 A3= temp[x+width2]; | |
872 A4= temp[x+1 ]; | |
873 A3 -= (A2 + A4)>>1; | |
874 A2 += (A1 + A3 + 2)>>2; | |
875 b[x+width2] = A3; | |
876 b[x ] = A2; | |
877 | |
878 A1= temp[x+1+width2]; | |
879 A2= temp[x+2 ]; | |
880 A1 -= (A2 + A4)>>1; | |
881 A4 += (A1 + A3 + 2)>>2; | |
882 b[x+1+width2] = A1; | |
883 b[x+1 ] = A4; | |
884 } | |
885 A3= temp[width-1]; | |
886 A3 -= A2; | |
887 A2 += (A1 + A3 + 2)>>2; | |
888 b[width -1] = A3; | |
889 b[width2-1] = A2; | |
2893 | 890 } |
2967 | 891 #else |
2138 | 892 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
893 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
6413 | 894 #endif /* 0 */ |
2138 | 895 } |
896 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
897 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 898 int i; |
2967 | 899 |
2138 | 900 for(i=0; i<width; i++){ |
901 b1[i] -= (b0[i] + b2[i])>>1; | |
902 } | |
903 } | |
904 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
905 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 906 int i; |
2967 | 907 |
2138 | 908 for(i=0; i<width; i++){ |
909 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
910 } | |
911 } | |
912 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
913 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 914 int y; |
2138 | 915 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
916 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
2967 | 917 |
2138 | 918 for(y=-2; y<height; y+=2){ |
919 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
920 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
921 | |
2998 | 922 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
923 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
6405 | 924 |
2998 | 925 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
926 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
2967 | 927 |
2138 | 928 b0=b2; |
929 b1=b3; | |
930 } | |
931 } | |
932 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
933 static void horizontal_decompose97i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
934 DWTELEM temp[width]; |
2138 | 935 const int w2= (width+1)>>1; |
936 | |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
937 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
938 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
939 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
2138 | 940 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
941 } | |
942 | |
943 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
944 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 945 int i; |
2967 | 946 |
2138 | 947 for(i=0; i<width; i++){ |
948 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
949 } | |
950 } | |
951 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
952 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 953 int i; |
2967 | 954 |
2138 | 955 for(i=0; i<width; i++){ |
956 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
957 } | |
958 } | |
959 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
960 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 961 int i; |
2967 | 962 |
2138 | 963 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
964 #ifdef liftS |
2138 | 965 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
966 #else |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
967 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
968 #endif |
2138 | 969 } |
970 } | |
971 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
972 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 973 int i; |
2967 | 974 |
2138 | 975 for(i=0; i<width; i++){ |
976 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
977 } | |
978 } | |
979 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
980 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 981 int y; |
2138 | 982 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
983 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
984 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
985 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
2967 | 986 |
2138 | 987 for(y=-4; y<height; y+=2){ |
988 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
989 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
990 | |
2998 | 991 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
992 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
6405 | 993 |
2998 | 994 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
995 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
996 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
997 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
2138 | 998 |
999 b0=b2; | |
1000 b1=b3; | |
1001 b2=b4; | |
1002 b3=b5; | |
1003 } | |
1004 } | |
1005 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1006 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2138 | 1007 int level; |
2967 | 1008 |
2164 | 1009 for(level=0; level<decomposition_count; level++){ |
1010 switch(type){ | |
3326 | 1011 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1012 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
2138 | 1013 } |
1014 } | |
1015 } | |
1016 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1017 static void horizontal_compose53i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1018 IDWTELEM temp[width]; |
2138 | 1019 const int width2= width>>1; |
1020 const int w2= (width+1)>>1; | |
2893 | 1021 int x; |
2138 | 1022 |
1023 #if 0 | |
2893 | 1024 int A1,A2,A3,A4; |
2138 | 1025 A2= temp[1 ]; |
1026 A4= temp[0 ]; | |
1027 A1= temp[0+width2]; | |
1028 A1 -= (A2 + A4)>>1; | |
1029 A4 += (A1 + 1)>>1; | |
1030 b[0+width2] = A1; | |
1031 b[0 ] = A4; | |
1032 for(x=1; x+1<width2; x+=2){ | |
1033 A3= temp[x+width2]; | |
1034 A4= temp[x+1 ]; | |
1035 A3 -= (A2 + A4)>>1; | |
1036 A2 += (A1 + A3 + 2)>>2; | |
1037 b[x+width2] = A3; | |
1038 b[x ] = A2; | |
1039 | |
1040 A1= temp[x+1+width2]; | |
1041 A2= temp[x+2 ]; | |
1042 A1 -= (A2 + A4)>>1; | |
1043 A4 += (A1 + A3 + 2)>>2; | |
1044 b[x+1+width2] = A1; | |
1045 b[x+1 ] = A4; | |
1046 } | |
1047 A3= temp[width-1]; | |
1048 A3 -= A2; | |
1049 A2 += (A1 + A3 + 2)>>2; | |
1050 b[width -1] = A3; | |
1051 b[width2-1] = A2; | |
2967 | 1052 #else |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1053 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1054 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
5910 | 1055 #endif /* 0 */ |
2138 | 1056 for(x=0; x<width2; x++){ |
1057 b[2*x ]= temp[x ]; | |
1058 b[2*x + 1]= temp[x+w2]; | |
1059 } | |
1060 if(width&1) | |
1061 b[2*x ]= temp[x ]; | |
1062 } | |
1063 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1064 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1065 int i; |
2967 | 1066 |
2138 | 1067 for(i=0; i<width; i++){ |
1068 b1[i] += (b0[i] + b2[i])>>1; | |
1069 } | |
1070 } | |
1071 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1072 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1073 int i; |
2967 | 1074 |
2138 | 1075 for(i=0; i<width; i++){ |
1076 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1077 } | |
1078 } | |
1079 | |
8308 | 1080 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1081 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1082 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1083 cs->y = -1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1084 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1085 |
8308 | 1086 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1087 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1088 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1089 cs->y = -1; | |
1090 } | |
1091 | |
8308 | 1092 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1093 int y= cs->y; |
2967 | 1094 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1095 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1096 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1097 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1098 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1099 |
2998 | 1100 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1101 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1102 |
2998 | 1103 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1104 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1105 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1106 cs->b0 = b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1107 cs->b1 = b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1108 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 |
8308 | 1111 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1112 int y= cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1113 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1114 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1115 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1116 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
2138 | 1117 |
2998 | 1118 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1119 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1120 |
2998 | 1121 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1122 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2138 | 1123 |
2562 | 1124 cs->b0 = b2; |
1125 cs->b1 = b3; | |
1126 cs->y += 2; | |
1127 } | |
1128 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1129 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
8308 | 1130 DWTCompose cs; |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1131 spatial_compose53i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1132 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1133 spatial_compose53i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1134 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1135 |
2967 | 1136 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1137 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1138 IDWTELEM temp[width]; |
2138 | 1139 const int w2= (width+1)>>1; |
1140 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1141 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1142 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1143 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1144 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
2138 | 1145 } |
1146 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1147 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1148 int i; |
2967 | 1149 |
2138 | 1150 for(i=0; i<width; i++){ |
1151 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1152 } | |
1153 } | |
1154 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1155 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1156 int i; |
2967 | 1157 |
2138 | 1158 for(i=0; i<width; i++){ |
1159 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1160 } | |
1161 } | |
1162 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1163 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1164 int i; |
2967 | 1165 |
2138 | 1166 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1167 #ifdef liftS |
2138 | 1168 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1169 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1170 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1171 #endif |
2138 | 1172 } |
1173 } | |
1174 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1175 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1176 int i; |
2967 | 1177 |
2138 | 1178 for(i=0; i<width; i++){ |
1179 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1180 } | |
1181 } | |
1182 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1183 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
2592 | 1184 int i; |
2967 | 1185 |
2592 | 1186 for(i=0; i<width; i++){ |
1187 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1188 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1189 #ifdef liftS |
2592 | 1190 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1191 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1192 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1193 #endif |
2592 | 1194 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1195 } | |
1196 } | |
1197 | |
8308 | 1198 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1199 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1200 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1201 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1202 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1203 cs->y = -3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1204 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1205 |
8308 | 1206 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1207 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1208 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1209 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1210 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1211 cs->y = -3; | |
1212 } | |
2138 | 1213 |
8308 | 1214 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1215 int y = cs->y; |
2967 | 1216 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1217 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1218 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1219 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1220 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1221 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1222 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
2967 | 1223 |
2592 | 1224 if(y>0 && y+4<height){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1225 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
2592 | 1226 }else{ |
2998 | 1227 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1228 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1229 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1230 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2592 | 1231 } |
6405 | 1232 |
9495 | 1233 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
1234 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1235 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1236 cs->b0=b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1237 cs->b1=b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1238 cs->b2=b4; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1239 cs->b3=b5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1240 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1241 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1242 |
8308 | 1243 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1244 int y = cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1245 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1246 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1247 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1248 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1249 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1250 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
2138 | 1251 |
9495 | 1252 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1253 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1254 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1255 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
1256 | |
1257 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); | |
1258 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); | |
2562 | 1259 |
1260 cs->b0=b2; | |
1261 cs->b1=b3; | |
1262 cs->b2=b4; | |
1263 cs->b3=b5; | |
1264 cs->y += 2; | |
1265 } | |
1266 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1267 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
8308 | 1268 DWTCompose cs; |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1269 spatial_compose97i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1270 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1271 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1272 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1273 |
8308 | 1274 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1275 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1276 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1277 switch(type){ |
3326 | 1278 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1279 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1280 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1281 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1282 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1283 |
8308 | 1284 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1285 int level; |
1286 for(level=decomposition_count-1; level>=0; level--){ | |
1287 switch(type){ | |
3326 | 1288 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1289 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
2562 | 1290 } |
1291 } | |
1292 } | |
1293 | |
8308 | 1294 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
2562 | 1295 const int support = type==1 ? 3 : 5; |
1296 int level; | |
1297 if(type==2) return; | |
1298 | |
1299 for(level=decomposition_count-1; level>=0; level--){ | |
1300 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1301 switch(type){ | |
3326 | 1302 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1303 break; |
3326 | 1304 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1305 break; |
2562 | 1306 } |
1307 } | |
2138 | 1308 } |
1309 } | |
1310 | |
8308 | 1311 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1312 const int support = type==1 ? 3 : 5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1313 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1314 if(type==2) return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1315 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1316 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1317 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1318 switch(type){ |
3326 | 1319 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1320 break; |
3326 | 1321 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1322 break; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1323 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1324 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1325 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1326 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1327 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1328 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
8308 | 1329 DWTCompose cs[MAX_DECOMPOSITIONS]; |
2562 | 1330 int y; |
1331 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1332 for(y=0; y<height; y+=4) | |
1333 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
2138 | 1334 } |
1335 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1336 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1337 const int w= b->width; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1338 const int h= b->height; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1339 int x, y; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1340 |
2138 | 1341 if(1){ |
1342 int run=0; | |
2149 | 1343 int runs[w*h]; |
2138 | 1344 int run_index=0; |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1345 int max_index; |
2967 | 1346 |
2138 | 1347 for(y=0; y<h; y++){ |
1348 for(x=0; x<w; x++){ | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1349 int v, p=0; |
2144 | 1350 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1351 v= src[x + y*stride]; |
2138 | 1352 |
1353 if(y){ | |
2149 | 1354 t= src[x + (y-1)*stride]; |
2138 | 1355 if(x){ |
2149 | 1356 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1357 } |
1358 if(x + 1 < w){ | |
2149 | 1359 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1360 } |
1361 } | |
1362 if(x){ | |
2149 | 1363 l= src[x - 1 + y*stride]; |
2144 | 1364 /*if(x > 1){ |
1365 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1366 else ll= src[x - 2 + y*stride]; | |
2138 | 1367 }*/ |
1368 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1369 if(parent){ |
2149 | 1370 int px= x>>1; |
1371 int py= y>>1; | |
2967 | 1372 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1373 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1374 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1375 if(!(/*ll|*/l|lt|t|rt|p)){ |
2138 | 1376 if(v){ |
1377 runs[run_index++]= run; | |
1378 run=0; | |
1379 }else{ | |
1380 run++; | |
1381 } | |
1382 } | |
1383 } | |
1384 } | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1385 max_index= run_index; |
2138 | 1386 runs[run_index++]= run; |
1387 run_index=0; | |
1388 run= runs[run_index++]; | |
1389 | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1390 put_symbol2(&s->c, b->state[30], max_index, 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1391 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1392 put_symbol2(&s->c, b->state[1], run, 3); |
2967 | 1393 |
2138 | 1394 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1395 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
2422 | 1396 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1397 return -1; | |
1398 } | |
2138 | 1399 for(x=0; x<w; x++){ |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1400 int v, p=0; |
2144 | 1401 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1402 v= src[x + y*stride]; |
2138 | 1403 |
1404 if(y){ | |
2149 | 1405 t= src[x + (y-1)*stride]; |
2138 | 1406 if(x){ |
2149 | 1407 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1408 } |
1409 if(x + 1 < w){ | |
2149 | 1410 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1411 } |
1412 } | |
1413 if(x){ | |
2149 | 1414 l= src[x - 1 + y*stride]; |
2144 | 1415 /*if(x > 1){ |
1416 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1417 else ll= src[x - 2 + y*stride]; | |
2138 | 1418 }*/ |
1419 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1420 if(parent){ |
2149 | 1421 int px= x>>1; |
1422 int py= y>>1; | |
2967 | 1423 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1424 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1425 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1426 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1427 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
2144 | 1428 |
2335 | 1429 put_rac(&s->c, &b->state[0][context], !!v); |
2138 | 1430 }else{ |
1431 if(!run){ | |
1432 run= runs[run_index++]; | |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1433 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1434 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1435 put_symbol2(&s->c, b->state[1], run, 3); |
2138 | 1436 assert(v); |
1437 }else{ | |
1438 run--; | |
1439 assert(!v); | |
1440 } | |
1441 } | |
1442 if(v){ | |
4001 | 1443 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
1444 int l2= 2*FFABS(l) + (l<0); | |
1445 int t2= 2*FFABS(t) + (t<0); | |
1446 | |
1447 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
2596 | 1448 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
2138 | 1449 } |
1450 } | |
1451 } | |
1452 } | |
2422 | 1453 return 0; |
2138 | 1454 } |
1455 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1456 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1457 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1458 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
2422 | 1459 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1460 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1461 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1462 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1463 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
2138 | 1464 const int w= b->width; |
1465 const int h= b->height; | |
1466 int x,y; | |
2967 | 1467 |
2138 | 1468 if(1){ |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1469 int run, runs; |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1470 x_and_coeff *xc= b->x_coeff; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1471 x_and_coeff *prev_xc= NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1472 x_and_coeff *prev2_xc= xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1473 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1474 x_and_coeff *prev_parent_xc= parent_xc; |
2138 | 1475 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1476 runs= get_symbol2(&s->c, b->state[30], 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1477 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1478 else run= INT_MAX; |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1479 |
2138 | 1480 for(y=0; y<h; y++){ |
2193 | 1481 int v=0; |
1482 int lt=0, t=0, rt=0; | |
1483 | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1484 if(y && prev_xc->x == 0){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1485 rt= prev_xc->coeff; |
2193 | 1486 } |
2138 | 1487 for(x=0; x<w; x++){ |
2193 | 1488 int p=0; |
1489 const int l= v; | |
2967 | 1490 |
2193 | 1491 lt= t; t= rt; |
1492 | |
2194 | 1493 if(y){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1494 if(prev_xc->x <= x) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1495 prev_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1496 if(prev_xc->x == x + 1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1497 rt= prev_xc->coeff; |
2194 | 1498 else |
1499 rt=0; | |
1500 } | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1501 if(parent_xc){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1502 if(x>>1 > parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1503 parent_xc++; |
2192 | 1504 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1505 if(x>>1 == parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1506 p= parent_xc->coeff; |
2194 | 1507 } |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1508 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1509 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1510 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
2144 | 1511 |
2335 | 1512 v=get_rac(&s->c, &b->state[0][context]); |
2605 | 1513 if(v){ |
1514 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1515 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
2967 | 1516 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1517 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1518 (xc++)->coeff= v; |
2605 | 1519 } |
2138 | 1520 }else{ |
1521 if(!run){ | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1522 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1523 else run= INT_MAX; |
2605 | 1524 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1525 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
2967 | 1526 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1527 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1528 (xc++)->coeff= v; |
2138 | 1529 }else{ |
2606 | 1530 int max_run; |
2138 | 1531 run--; |
1532 v=0; | |
2191 | 1533 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1534 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
2606 | 1535 else max_run= FFMIN(run, w-x-1); |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1536 if(parent_xc) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1537 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
2606 | 1538 x+= max_run; |
1539 run-= max_run; | |
2138 | 1540 } |
1541 } | |
2192 | 1542 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1543 (xc++)->x= w+1; //end marker |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1544 prev_xc= prev2_xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1545 prev2_xc= xc; |
2967 | 1546 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1547 if(parent_xc){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1548 if(y&1){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1549 while(parent_xc->x != parent->width+1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1550 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1551 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1552 prev_parent_xc= parent_xc; |
2192 | 1553 }else{ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1554 parent_xc= prev_parent_xc; |
2138 | 1555 } |
1556 } | |
1557 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1558 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1559 (xc++)->x= w+1; //end marker |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1560 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1561 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1562 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1563 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1564 const int w= b->width; |
2893 | 1565 int y; |
4594 | 1566 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 1567 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1568 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1569 int new_index = 0; |
2967 | 1570 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1571 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1572 qadd= 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1573 qmul= 1<<QEXPSHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1574 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1575 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1576 /* If we are on the second or later slice, restore our index. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1577 if (start_y != 0) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1578 new_index = save_state[0]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1579 |
2967 | 1580 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1581 for(y=start_y; y<h; y++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1582 int x = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1583 int v; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1584 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1585 memset(line, 0, b->width*sizeof(IDWTELEM)); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1586 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1587 x = b->x_coeff[new_index++].x; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
1588 while(x < w){ |
2596 | 1589 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1590 register int u= -(v&1); | |
1591 line[x] = (t^u) - u; | |
1592 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1593 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1594 x = b->x_coeff[new_index++].x; |
2138 | 1595 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1596 } |
2967 | 1597 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1598 /* Save our variables for the next slice. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1599 save_state[0] = new_index; |
2967 | 1600 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1601 return; |
2138 | 1602 } |
1603 | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1604 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
2138 | 1605 int plane_index, level, orientation; |
1606 | |
2199 | 1607 for(plane_index=0; plane_index<3; plane_index++){ |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
1608 for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
2138 | 1609 for(orientation=level ? 1:0; orientation<4; orientation++){ |
2335 | 1610 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
2138 | 1611 } |
1612 } | |
1613 } | |
2335 | 1614 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1615 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1616 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1617 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1618 static int alloc_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1619 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1620 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
2967 | 1621 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1622 s->b_width = w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1623 s->b_height= h; |
2967 | 1624 |
9370 | 1625 av_free(s->block); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1627 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1628 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1629 |
2335 | 1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1631 uint8_t *bytestream= d->bytestream; | |
1632 uint8_t *bytestream_start= d->bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1633 *d= *s; |
2335 | 1634 d->bytestream= bytestream; |
1635 d->bytestream_start= bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1636 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1637 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1638 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1639 static int pix_sum(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1640 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1641 int s, i, j; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1642 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1643 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1644 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1645 for (j = 0; j < w; j++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1646 s += pix[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1647 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1648 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1649 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1650 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1651 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1652 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1653 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1654 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1655 static int pix_norm1(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1656 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1657 int s, i, j; |
4179 | 1658 uint32_t *sq = ff_squareTbl + 256; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1659 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1660 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1662 for (j = 0; j < w; j ++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 s += sq[pix[0]]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1664 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1667 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1668 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1669 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 |
3314 | 1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1672 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1673 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1674 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1675 const int block_w= 1<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1676 BlockNode block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1677 int i,j; |
2967 | 1678 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 block.color[0]= l; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1680 block.color[1]= cb; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 block.color[2]= cr; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1682 block.mx= mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 block.my= my; |
3314 | 1684 block.ref= ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 block.type= type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1686 block.level= level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1688 for(j=0; j<block_w; j++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 for(i=0; i<block_w; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 s->block[index + i + j*w]= block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 const int offset[3]= { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 y*c-> stride + x, |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1700 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 int i; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 for(i=0; i<3; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 c->src[0][i]= src [i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 c->ref[0][i]= ref [i] + offset[i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 assert(!ref_index); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1708 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1710 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1711 if(s->ref_frames == 1){ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1712 *mx = mid_pred(left->mx, top->mx, tr->mx); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1713 *my = mid_pred(left->my, top->my, tr->my); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1714 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1715 const int *scale = scale_mv_ref[ref]; |
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1716 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1717 (top ->mx * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1718 (tr ->mx * scale[tr ->ref] + 128) >>8); |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1719 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1720 (top ->my * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1721 (tr ->my * scale[tr ->ref] + 128) >>8); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1722 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1723 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1724 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1725 //FIXME copy&paste |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1726 #define P_LEFT P[1] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1727 #define P_TOP P[2] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1728 #define P_TOPRIGHT P[3] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 #define P_MEDIAN P[4] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 #define P_MV1 P[9] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 #define FLAG_QPEL 1 //must be 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 uint8_t p_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 uint8_t i_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 uint8_t p_state[sizeof(s->block_state)]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 uint8_t i_state[sizeof(s->block_state)]; |
2335 | 1738 RangeCoder pc, ic; |
1739 uint8_t *pbbak= s->c.bytestream; | |
1740 uint8_t *pbbak_start= s->c.bytestream_start; | |
5082 | 1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1742 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1743 const int h= s->b_height << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1744 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1745 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1746 const int block_w= 1<<(LOG2_MB_SIZE - level); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1747 int trx= (x+1)<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1748 int try= (y+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1749 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1750 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1754 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1755 int pl = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1756 int pcb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1757 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1758 int pmx, pmy; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1759 int mx=0, my=0; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1760 int l,cr,cb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1761 const int stride= s->current_picture.linesize[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1762 const int uvstride= s->current_picture.linesize[1]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1763 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1764 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1765 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1766 int P[10][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1767 int16_t last_mv[3][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1769 const int shift= 1+qpel; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 MotionEstContext *c= &s->m.me; |
3314 | 1771 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
1773 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1774 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
3314 | 1775 int ref, best_ref, ref_score, ref_mx, ref_my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1776 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1777 assert(sizeof(s->block_state) >= 256); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1778 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1780 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1781 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1782 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1783 // clip predictors / edge ? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 P_LEFT[0]= left->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1786 P_LEFT[1]= left->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1787 P_TOP [0]= top->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1788 P_TOP [1]= top->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1789 P_TOPRIGHT[0]= tr->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1790 P_TOPRIGHT[1]= tr->my; |
2967 | 1791 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1792 last_mv[0][0]= s->block[index].mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1793 last_mv[0][1]= s->block[index].my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1794 last_mv[1][0]= right->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1795 last_mv[1][1]= right->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1796 last_mv[2][0]= bottom->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1797 last_mv[2][1]= bottom->my; |
2967 | 1798 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 s->m.mb_stride=2; |
2967 | 1800 s->m.mb_x= |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1801 s->m.mb_y= 0; |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1802 c->skip= 0; |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1803 |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1804 assert(c-> stride == stride); |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1805 assert(c->uvstride == uvstride); |
2967 | 1806 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
2967 | 1811 |
2206 | 1812 c->xmin = - x*block_w - 16+2; |
1813 c->ymin = - y*block_w - 16+2; | |
1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1816 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
2967 | 1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1828 if (!y) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 c->pred_x= P_LEFT[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 c->pred_y= P_LEFT[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 } else { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 c->pred_x = P_MEDIAN[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1833 c->pred_y = P_MEDIAN[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1835 |
3314 | 1836 score= INT_MAX; |
1837 best_ref= 0; | |
1838 for(ref=0; ref<s->ref_frames; ref++){ | |
1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
1840 | |
1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
1843 | |
1844 assert(ref_mx >= c->xmin); | |
1845 assert(ref_mx <= c->xmax); | |
1846 assert(ref_my >= c->ymin); | |
1847 assert(ref_my <= c->ymax); | |
1848 | |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
3314 | 1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
1851 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
1852 if(s->ref_mvs[ref]){ | |
1853 s->ref_mvs[ref][index][0]= ref_mx; | |
1854 s->ref_mvs[ref][index][1]= ref_my; | |
1855 s->ref_scores[ref][index]= ref_score; | |
1856 } | |
1857 if(score > ref_score){ | |
1858 score= ref_score; | |
1859 best_ref= ref; | |
1860 mx= ref_mx; | |
1861 my= ref_my; | |
1862 } | |
1863 } | |
5127 | 1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
2967 | 1865 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1866 // subpel search |
5085 | 1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1868 pc= s->c; |
2335 | 1869 pc.bytestream_start= |
1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1871 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1872 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1873 if(level!=s->block_max_depth) |
2335 | 1874 put_rac(&pc, &p_state[4 + s_context], 1); |
1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
3314 | 1876 if(s->ref_frames > 1) |
1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
3314 | 1879 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
1880 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
2335 | 1881 p_len= pc.bytestream - pc.bytestream_start; |
5082 | 1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1883 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1884 block_s= block_w*block_w; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1885 sum = pix_sum(current_data[0], stride, block_w); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1886 l= (sum + block_s/2)/block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2967 | 1888 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1889 block_s= block_w*block_w>>2; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1890 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1891 cb= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1892 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1893 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1894 cr= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1895 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1896 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1897 ic= s->c; |
2335 | 1898 ic.bytestream_start= |
1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1900 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1901 if(level!=s->block_max_depth) |
2335 | 1902 put_rac(&ic, &i_state[4 + s_context], 1); |
1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1904 put_symbol(&ic, &i_state[32], l-pl , 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1905 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1906 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
2335 | 1907 i_len= ic.bytestream - ic.bytestream_start; |
5082 | 1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1909 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1910 // assert(score==256*256*256*64-1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1911 assert(iscore < 255*255*256 + s->lambda2*10); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1912 assert(iscore >= 0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1913 assert(l>=0 && l<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1914 assert(pl>=0 && pl<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1915 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1916 if(level==0){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1917 int varc= iscore >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1918 int vard= score >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1919 if (vard <= 64 || vard < varc) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1921 else |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1922 c->scene_change_score+= s->m.qscale; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1923 } |
2967 | 1924 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1925 if(level!=s->block_max_depth){ |
2335 | 1926 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1927 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1929 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
2967 | 1932 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 if(score2 < score && score2 < iscore) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1934 return score2; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1935 } |
2967 | 1936 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 if(iscore < score){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2335 | 1939 memcpy(pbbak, i_buffer, i_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1940 s->c= ic; |
2335 | 1941 s->c.bytestream_start= pbbak_start; |
1942 s->c.bytestream= pbbak + i_len; | |
3314 | 1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1944 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1945 return iscore; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1946 }else{ |
2335 | 1947 memcpy(pbbak, p_buffer, p_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 s->c= pc; |
2335 | 1949 s->c.bytestream_start= pbbak_start; |
1950 s->c.bytestream= pbbak + p_len; | |
3314 | 1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1952 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1953 return score; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1954 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1955 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1959 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1960 }else{ |
3314 | 1961 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1962 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1963 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1964 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1966 const int w= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1967 const int rem_depth= s->block_max_depth - level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1968 const int index= (x + y*w) << rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1969 int trx= (x+1)<<rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1970 BlockNode *b= &s->block[index]; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1971 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1972 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1974 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1975 int pl = left->color[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1976 int pcb= left->color[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1977 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1978 int pmx, pmy; |
3314 | 1979 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1980 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
1981 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1982 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1983 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1984 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1986 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1987 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1988 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1989 if(level!=s->block_max_depth){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1991 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1992 }else{ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1993 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1998 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1999 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 if(b->type & BLOCK_INTRA){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
3314 | 2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2008 }else{ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
3314 | 2011 if(s->ref_frames > 1) |
2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
3314 | 2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2016 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2017 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2018 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2020 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2021 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2022 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2023 int trx= (x+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2028 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
2967 | 2029 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2030 if(s->keyframe){ |
3314 | 2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2032 return; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2033 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2034 |
2335 | 2035 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
4332 | 2036 int type, mx, my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2037 int l = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2038 int cb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2039 int cr= left->color[2]; |
3314 | 2040 int ref = 0; |
2041 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
4001 | 2042 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
2043 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2967 | 2044 |
2335 | 2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2046 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2047 if(type){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2048 pred_mv(s, &mx, &my, 0, left, top, tr); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2049 l += get_symbol(&s->c, &s->block_state[32], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2050 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2051 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2052 }else{ |
3314 | 2053 if(s->ref_frames > 1) |
2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2055 pred_mv(s, &mx, &my, ref, left, top, tr); |
3314 | 2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
2057 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2058 } |
3314 | 2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2060 }else{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2066 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2067 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2068 static void encode_blocks(SnowContext *s, int search){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2069 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2070 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2071 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2072 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2074 iterative_me(s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2075 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2076 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2077 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2422 | 2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2079 return; | |
2080 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2081 for(x=0; x<w; x++){ |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2082 if(s->avctx->me_method == ME_ITER || !search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2083 encode_q_branch2(s, 0, x, y); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2084 else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2085 encode_q_branch (s, 0, x, y); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2086 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2087 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2088 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2089 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2090 static void decode_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2091 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2092 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2093 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2094 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2095 for(y=0; y<h; y++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2096 for(x=0; x<w; x++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2097 decode_q_branch(s, 0, x, y); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2098 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2099 } |
2138 | 2100 } |
2101 | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2102 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
7622 | 2103 static const uint8_t weight[64]={ |
5648 | 2104 8,7,6,5,4,3,2,1, |
2105 7,7,0,0,0,0,0,1, | |
2106 6,0,6,0,0,0,2,0, | |
2107 5,0,0,5,0,3,0,0, | |
2108 4,0,0,0,4,0,0,0, | |
2109 3,0,0,5,0,3,0,0, | |
2110 2,0,6,0,0,0,2,0, | |
2111 1,7,0,0,0,0,0,1, | |
2112 }; | |
2113 | |
7622 | 2114 static const uint8_t brane[256]={ |
5648 | 2115 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, |
2116 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2117 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2118 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2119 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2120 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2121 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2122 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2123 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2124 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2125 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2126 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2127 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2128 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2129 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2130 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2131 }; | |
2132 | |
7622 | 2133 static const uint8_t needs[16]={ |
5648 | 2134 0,1,0,0, |
2135 2,4,2,0, | |
2136 0,1,0,0, | |
2137 15 | |
2138 }; | |
2139 | |
2140 int x, y, b, r, l; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2141 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2142 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
5648 | 2143 int16_t *tmpI= tmpIt; |
2144 uint8_t *tmp2= tmp2t[0]; | |
6205 | 2145 const uint8_t *hpel[11]; |
5648 | 2146 assert(dx<16 && dy<16); |
2147 r= brane[dx + 16*dy]&15; | |
2148 l= brane[dx + 16*dy]>>4; | |
2149 | |
2150 b= needs[l] | needs[r]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2151 if(p && !p->diag_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2152 b= 15; |
5648 | 2153 |
2154 if(b&5){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2155 for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
5649 | 2156 for(x=0; x < b_w; x++){ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2157 int a_1=src[x + HTAPS_MAX/2-4]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2158 int a0= src[x + HTAPS_MAX/2-3]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2159 int a1= src[x + HTAPS_MAX/2-2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2160 int a2= src[x + HTAPS_MAX/2-1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2161 int a3= src[x + HTAPS_MAX/2+0]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2162 int a4= src[x + HTAPS_MAX/2+1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2163 int a5= src[x + HTAPS_MAX/2+2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2164 int a6= src[x + HTAPS_MAX/2+3]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2165 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2166 if(!p || p->fast_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2167 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2168 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2169 am= (am+16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2170 }else{ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2171 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2172 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2173 am= (am+32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2174 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2175 |
5649 | 2176 if(am&(~255)) am= ~(am>>31); |
2177 tmp2[x]= am; | |
2178 } | |
2179 tmpI+= 64; | |
2180 tmp2+= stride; | |
2181 src += stride; | |
2138 | 2182 } |
5649 | 2183 src -= stride*y; |
5648 | 2184 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2185 src += HTAPS_MAX/2 - 1; |
5648 | 2186 tmp2= tmp2t[1]; |
2187 | |
2188 if(b&2){ | |
5649 | 2189 for(y=0; y < b_h; y++){ |
2190 for(x=0; x < b_w+1; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2191 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2192 int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2193 int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2194 int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2195 int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2196 int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2197 int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2198 int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2199 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2200 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2201 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2202 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2203 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2204 |
5649 | 2205 if(am&(~255)) am= ~(am>>31); |
2206 tmp2[x]= am; | |
2207 } | |
2208 src += stride; | |
2209 tmp2+= stride; | |
5648 | 2210 } |
5649 | 2211 src -= stride*y; |
5648 | 2212 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2213 src += stride*(HTAPS_MAX/2 - 1); |
5648 | 2214 tmp2= tmp2t[2]; |
2215 tmpI= tmpIt; | |
2216 if(b&4){ | |
2217 for(y=0; y < b_h; y++){ | |
2218 for(x=0; x < b_w; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2219 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2220 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2221 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2222 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2223 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2224 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2225 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2226 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2227 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2228 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2229 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2230 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2231 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
5648 | 2232 if(am&(~255)) am= ~(am>>31); |
2233 tmp2[x]= am; | |
2234 } | |
2235 tmpI+= 64; | |
2236 tmp2+= stride; | |
2138 | 2237 } |
5648 | 2238 } |
2239 | |
2240 hpel[ 0]= src; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2241 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); |
5648 | 2242 hpel[ 2]= src + 1; |
2243 | |
2244 hpel[ 4]= tmp2t[1]; | |
2245 hpel[ 5]= tmp2t[2]; | |
2246 hpel[ 6]= tmp2t[1] + 1; | |
2247 | |
2248 hpel[ 8]= src + stride; | |
2249 hpel[ 9]= hpel[1] + stride; | |
2250 hpel[10]= hpel[8] + 1; | |
2251 | |
2252 if(b==15){ | |
6205 | 2253 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; |
2254 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2255 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2256 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
5648 | 2257 dx&=7; |
2258 dy&=7; | |
2259 for(y=0; y < b_h; y++){ | |
2260 for(x=0; x < b_w; x++){ | |
2261 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2262 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2263 } | |
2264 src1+=stride; | |
2265 src2+=stride; | |
2266 src3+=stride; | |
2267 src4+=stride; | |
2268 dst +=stride; | |
2269 } | |
2270 }else{ | |
6205 | 2271 const uint8_t *src1= hpel[l]; |
2272 const uint8_t *src2= hpel[r]; | |
5648 | 2273 int a= weight[((dx&7) + (8*(dy&7)))]; |
2274 int b= 8-a; | |
2275 for(y=0; y < b_h; y++){ | |
2276 for(x=0; x < b_w; x++){ | |
2277 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2278 } | |
2279 src1+=stride; | |
2280 src2+=stride; | |
2281 dst +=stride; | |
2282 } | |
2138 | 2283 } |
2284 } | |
2285 | |
2286 #define mca(dx,dy,b_w)\ | |
5254 | 2287 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2288 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
2138 | 2289 assert(h==b_w);\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2290 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ |
2138 | 2291 } |
2292 | |
2293 mca( 0, 0,16) | |
2294 mca( 8, 0,16) | |
2295 mca( 0, 8,16) | |
2296 mca( 8, 8,16) | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2297 mca( 0, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2298 mca( 8, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2299 mca( 0, 8,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2300 mca( 8, 8,8) |
2138 | 2301 |
3314 | 2302 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2303 if(block->type & BLOCK_INTRA){ |
2206 | 2304 int x, y; |
3018 | 2305 const int color = block->color[plane_index]; |
2306 const int color4= color*0x01010101; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2307 if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2308 for(y=0; y < b_h; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2309 *(uint32_t*)&dst[0 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2310 *(uint32_t*)&dst[4 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2311 *(uint32_t*)&dst[8 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2312 *(uint32_t*)&dst[12+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2313 *(uint32_t*)&dst[16+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2314 *(uint32_t*)&dst[20+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2315 *(uint32_t*)&dst[24+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2316 *(uint32_t*)&dst[28+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2317 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2318 }else if(b_w==16){ |
3018 | 2319 for(y=0; y < b_h; y++){ |
2320 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2321 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2322 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2323 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2324 } | |
2325 }else if(b_w==8){ | |
2326 for(y=0; y < b_h; y++){ | |
2327 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2328 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2329 } | |
2330 }else if(b_w==4){ | |
2331 for(y=0; y < b_h; y++){ | |
2332 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2333 } | |
2334 }else{ | |
2335 for(y=0; y < b_h; y++){ | |
2336 for(x=0; x < b_w; x++){ | |
2337 dst[x + y*stride]= color; | |
2338 } | |
2138 | 2339 } |
2340 } | |
2341 }else{ | |
3314 | 2342 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
2206 | 2343 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
2344 int mx= block->mx*scale; | |
2345 int my= block->my*scale; | |
2223 | 2346 const int dx= mx&15; |
2347 const int dy= my&15; | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2348 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2349 sx += (mx>>4) - (HTAPS_MAX/2-1); |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2350 sy += (my>>4) - (HTAPS_MAX/2-1); |
2206 | 2351 src += sx + sy*stride; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2352 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2353 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2354 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); |
2206 | 2355 src= tmp + MB_SIZE; |
2138 | 2356 } |
3189 | 2357 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2358 // assert(!(b_w&(b_w-1))); | |
3018 | 2359 assert(b_w>1 && b_h>1); |
6206
ea591bc7522a
add parenthesis, fix warning: snow.c:2413: warning: suggest parentheses around && within ||
bcoudurier
parents:
6205
diff
changeset
|
2360 assert((tab_index>=0 && tab_index<4) || b_w==32); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2361 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2362 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2363 else if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2364 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2365 for(y=0; y<b_h; y+=16){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2366 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2367 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2368 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2369 }else if(b_w==b_h) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
3018 | 2371 else if(b_w==2*b_h){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2373 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
3018 | 2374 }else{ |
2375 assert(2*b_w==b_h); | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
3018 | 2378 } |
2138 | 2379 } |
2380 } | |
2381 | |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2382 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2383 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2384 int y, x; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2385 IDWTELEM * dst; |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2386 for(y=0; y<b_h; y++){ |
5409 | 2387 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2388 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2389 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2390 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2391 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2392 dst = slice_buffer_get_line(sb, src_y + y); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2393 for(x=0; x<b_w; x++){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2394 int v= obmc1[x] * block[3][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2395 +obmc2[x] * block[2][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2396 +obmc3[x] * block[1][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2397 +obmc4[x] * block[0][x + y*src_stride]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2398 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2399 v <<= 8 - LOG2_OBMC_MAX; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2400 if(FRAC_BITS != 8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2401 v >>= 8 - FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2402 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2403 if(add){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2404 v += dst[x + src_x]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2405 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2406 if(v&(~255)) v= ~(v>>31); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2407 dst8[x + y*src_stride] = v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2408 }else{ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2409 dst[x + src_x] -= v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2410 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2411 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2412 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2413 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2414 |
6412 | 2415 //FIXME name cleanup (b_w, block_w, b_width stuff) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2416 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2417 const int b_width = s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2418 const int b_height= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2419 const int b_stride= b_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2420 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2421 BlockNode *rt= lt+1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2422 BlockNode *lb= lt+b_stride; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2423 BlockNode *rb= lb+1; |
2967 | 2424 uint8_t *block[4]; |
2842 | 2425 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
2426 uint8_t *tmp = s->scratchbuf; |
2842 | 2427 uint8_t *ptmp; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2428 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2429 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2430 if(b_x<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2431 lt= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2432 lb= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2433 }else if(b_x + 1 >= b_width){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2434 rt= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2435 rb= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2436 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2437 if(b_y<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2438 lt= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2439 rt= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2440 }else if(b_y + 1 >= b_height){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2441 lb= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2442 rb= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2443 } |
2967 | 2444 |
6412 | 2445 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2446 obmc -= src_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2447 b_w += src_x; |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2448 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2449 dst -= src_x; |
2206 | 2450 src_x=0; |
2451 }else if(src_x + b_w > w){ | |
2452 b_w = w - src_x; | |
2453 } | |
2454 if(src_y<0){ | |
2455 obmc -= src_y*obmc_stride; | |
2456 b_h += src_y; | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2457 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2458 dst -= src_y*dst_stride; |
2206 | 2459 src_y=0; |
2460 }else if(src_y + b_h> h){ | |
2461 b_h = h - src_y; | |
2462 } | |
2967 | 2463 |
2206 | 2464 if(b_w<=0 || b_h<=0) return; |
2465 | |
6414 | 2466 assert(src_stride > 2*MB_SIZE + 5); |
2467 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2468 if(!sliced && offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2469 dst += src_x + src_y*dst_stride; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2470 dst8+= src_x + src_y*src_stride; |
2206 | 2471 // src += src_x + src_y*src_stride; |
2472 | |
2842 | 2473 ptmp= tmp + 3*tmp_step; |
2474 block[0]= ptmp; | |
2475 ptmp+=tmp_step; | |
3314 | 2476 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
2206 | 2477 |
2478 if(same_block(lt, rt)){ | |
2479 block[1]= block[0]; | |
2480 }else{ | |
2842 | 2481 block[1]= ptmp; |
2482 ptmp+=tmp_step; | |
3314 | 2483 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2206 | 2484 } |
2967 | 2485 |
2206 | 2486 if(same_block(lt, lb)){ |
2487 block[2]= block[0]; | |
2488 }else if(same_block(rt, lb)){ | |
2489 block[2]= block[1]; | |
2490 }else{ | |
2842 | 2491 block[2]= ptmp; |
2492 ptmp+=tmp_step; | |
3314 | 2493 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2206 | 2494 } |
2495 | |
2496 if(same_block(lt, rb) ){ | |
2497 block[3]= block[0]; | |
2498 }else if(same_block(rt, rb)){ | |
2499 block[3]= block[1]; | |
2500 }else if(same_block(lb, rb)){ | |
2501 block[3]= block[2]; | |
2502 }else{ | |
2842 | 2503 block[3]= ptmp; |
3314 | 2504 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2206 | 2505 } |
2506 #if 0 | |
2507 for(y=0; y<b_h; y++){ | |
2508 for(x=0; x<b_w; x++){ | |
2509 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2510 if(add) dst[x + y*dst_stride] += v; | |
2511 else dst[x + y*dst_stride] -= v; | |
2512 } | |
2513 } | |
2514 for(y=0; y<b_h; y++){ | |
2515 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2516 for(x=0; x<b_w; x++){ | |
2517 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2518 if(add) dst[x + y*dst_stride] += v; | |
2519 else dst[x + y*dst_stride] -= v; | |
2520 } | |
2521 } | |
2522 for(y=0; y<b_h; y++){ | |
2523 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2524 for(x=0; x<b_w; x++){ | |
2525 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2526 if(add) dst[x + y*dst_stride] += v; | |
2527 else dst[x + y*dst_stride] -= v; | |
2528 } | |
2529 } | |
2530 for(y=0; y<b_h; y++){ | |
2531 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2532 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2533 for(x=0; x<b_w; x++){ | |
2534 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2535 if(add) dst[x + y*dst_stride] += v; | |
2536 else dst[x + y*dst_stride] -= v; | |
2537 } | |
2538 } | |
2539 #else | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2540 if(sliced){ |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2541 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
6475 | 2542 }else{ |
2543 for(y=0; y<b_h; y++){ | |
2544 //FIXME ugly misuse of obmc_stride | |
2545 const uint8_t *obmc1= obmc + y*obmc_stride; | |
2546 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2547 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2548 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2549 for(x=0; x<b_w; x++){ | |
2550 int v= obmc1[x] * block[3][x + y*src_stride] | |
2551 +obmc2[x] * block[2][x + y*src_stride] | |
2552 +obmc3[x] * block[1][x + y*src_stride] | |
2553 +obmc4[x] * block[0][x + y*src_stride]; | |
2554 | |
2555 v <<= 8 - LOG2_OBMC_MAX; | |
2556 if(FRAC_BITS != 8){ | |
2557 v >>= 8 - FRAC_BITS; | |
2558 } | |
2559 if(add){ | |
2560 v += dst[x + y*dst_stride]; | |
2561 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2562 if(v&(~255)) v= ~(v>>31); | |
2563 dst8[x + y*src_stride] = v; | |
2564 }else{ | |
2565 dst[x + y*dst_stride] -= v; | |
2566 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2567 } |
2206 | 2568 } |
2569 } | |
5910 | 2570 #endif /* 0 */ |
2206 | 2571 } |
2572 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2573 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2574 Plane *p= &s->plane[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2575 const int mb_w= s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2576 const int mb_h= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2577 int x, y, mb_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2578 int block_size = MB_SIZE >> s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2579 int block_w = plane_index ? block_size/2 : block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2580 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2581 int obmc_stride= plane_index ? block_size : 2*block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2582 int ref_stride= s->current_picture.linesize[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2583 uint8_t *dst8= s->current_picture.data[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2584 int w= p->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2585 int h= p->height; |
2967 | 2586 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2587 if(s->keyframe || (s->avctx->debug&512)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2588 if(mb_y==mb_h) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2589 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2590 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2591 if(add){ |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2592 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2593 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2594 IDWTELEM * line = sb->line[y]; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2595 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2596 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2597 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2598 v >>= FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2599 if(v&(~255)) v= ~(v>>31); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2600 dst8[x + y*ref_stride]= v; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2601 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2602 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2603 }else{ |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2604 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2605 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2606 IDWTELEM * line = sb->line[y]; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2607 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2608 line[x] -= 128 << FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2609 // buf[x + y*w]-= 128<<FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2610 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2611 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2612 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2613 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2614 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2615 } |
2967 | 2616 |
6475 | 2617 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2618 add_yblock(s, 1, sb, old_buffer, dst8, obmc, | |
2619 block_w*mb_x - block_w/2, | |
2620 block_w*mb_y - block_w/2, | |
2621 block_w, block_w, | |
2622 w, h, | |
2623 w, ref_stride, obmc_stride, | |
2624 mb_x - 1, mb_y - 1, | |
2625 add, 0, plane_index); | |
2626 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2627 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2628 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2629 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2138 | 2630 Plane *p= &s->plane[plane_index]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2631 const int mb_w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2632 const int mb_h= s->b_height << s->block_max_depth; |
2562 | 2633 int x, y, mb_x; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2634 int block_size = MB_SIZE >> s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2635 int block_w = plane_index ? block_size/2 : block_size; |
2206 | 2636 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2637 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2638 int ref_stride= s->current_picture.linesize[plane_index]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2639 uint8_t *dst8= s->current_picture.data[plane_index]; |
2138 | 2640 int w= p->width; |
2641 int h= p->height; | |
2967 | 2642 |
2206 | 2643 if(s->keyframe || (s->avctx->debug&512)){ |
2562 | 2644 if(mb_y==mb_h) |
2645 return; | |
2646 | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2647 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2648 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2649 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2650 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2651 v >>= FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2652 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2653 dst8[x + y*ref_stride]= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2654 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2655 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2656 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2657 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2658 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2659 buf[x + y*w]-= 128<<FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2660 } |
2206 | 2661 } |
2138 | 2662 } |
2206 | 2663 |
2664 return; | |
2138 | 2665 } |
2967 | 2666 |
6414 | 2667 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2668 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
2669 block_w*mb_x - block_w/2, | |
2670 block_w*mb_y - block_w/2, | |
2671 block_w, block_w, | |
2672 w, h, | |
2673 w, ref_stride, obmc_stride, | |
2674 mb_x - 1, mb_y - 1, | |
2675 add, 1, plane_index); | |
2676 } | |
2562 | 2677 } |
2678 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2679 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2562 | 2680 const int mb_h= s->b_height << s->block_max_depth; |
2681 int mb_y; | |
2682 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2683 predict_slice(s, buf, plane_index, add, mb_y); | |
2138 | 2684 } |
2685 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2686 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2687 int i, x2, y2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2688 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2689 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2690 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2691 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2692 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2693 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2694 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2695 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2696 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2697 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2698 const int h= p->height; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2699 int index= mb_x + mb_y*b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2700 BlockNode *b= &s->block[index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2701 BlockNode backup= *b; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2702 int ab=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2703 int aa=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2704 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2705 b->type|= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2706 b->color[plane_index]= 0; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2707 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2708 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2709 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2710 int mb_x2= mb_x + (i &1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2711 int mb_y2= mb_y + (i>>1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2712 int x= block_w*mb_x2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2713 int y= block_w*mb_y2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2714 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2715 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2716 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2717 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2718 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2719 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2720 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2721 int obmc_v= obmc[index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2722 int d; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2723 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2724 if(x<0) obmc_v += obmc[index + block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2725 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2726 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
6412 | 2727 //FIXME precalculate this or simplify it somehow else |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2728 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2729 d = -dst[index] + (1<<(FRAC_BITS-1)); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2730 dst[index] = d; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2731 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
6412 | 2732 aa += obmc_v * obmc_v; //FIXME precalculate this |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2733 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2734 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2735 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2736 *b= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2737 |
5127 | 2738 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2739 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2740 |
3051 | 2741 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
2742 const int b_stride = s->b_width << s->block_max_depth; | |
2743 const int b_height = s->b_height<< s->block_max_depth; | |
2744 int index= x + y*b_stride; | |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2745 const BlockNode *b = &s->block[index]; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2746 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2747 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2748 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2749 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
3051 | 2750 int dmx, dmy; |
4001 | 2751 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
2752 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
3051 | 2753 |
2754 if(x<0 || x>=b_stride || y>=b_height) | |
2755 return 0; | |
2756 /* | |
2757 1 0 0 | |
2758 01X 1-2 1 | |
2759 001XX 3-6 2-3 | |
2760 0001XXX 7-14 4-7 | |
2761 00001XXXX 15-30 8-15 | |
2762 */ | |
2763 //FIXME try accurate rate | |
6412 | 2764 //FIXME intra and inter predictors if surrounding blocks are not the same type |
3051 | 2765 if(b->type & BLOCK_INTRA){ |
4001 | 2766 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
2767 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
2768 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2769 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2770 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2771 dmx-= b->mx; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2772 dmy-= b->my; |
4001 | 2773 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
2774 + av_log2(2*FFABS(dmy)) | |
3314 | 2775 + av_log2(2*b->ref)); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2776 } |
3051 | 2777 } |
2778 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2779 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2780 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2781 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2782 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2783 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2784 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2785 uint8_t *dst= s->current_picture.data[plane_index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2786 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2787 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
2788 uint8_t *cur = s->scratchbuf; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2789 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2790 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2791 const int b_height = s->b_height<< s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2792 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2793 const int h= p->height; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2794 int distortion; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2795 int rate= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2796 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2797 int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2798 int sy= block_w*mb_y - block_w/2; |
3206 | 2799 int x0= FFMAX(0,-sx); |
2800 int y0= FFMAX(0,-sy); | |
2801 int x1= FFMIN(block_w*2, w-sx); | |
2802 int y1= FFMIN(block_w*2, h-sy); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2803 int i,x,y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2804 |
3314 | 2805 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2806 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2807 for(y=y0; y<y1; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2808 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2809 const IDWTELEM *pred1 = pred + y*obmc_stride; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2810 uint8_t *cur1 = cur + y*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2811 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2812 for(x=x0; x<x1; x++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2813 #if FRAC_BITS >= LOG2_OBMC_MAX |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2814 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2815 #else |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2816 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2817 #endif |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2818 v = (v + pred1[x]) >> FRAC_BITS; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2819 if(v&(~255)) v= ~(v>>31); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2820 dst1[x] = v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2821 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2822 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2823 |
3206 | 2824 /* copy the regions where obmc[] = (uint8_t)256 */ |
2825 if(LOG2_OBMC_MAX == 8 | |
2826 && (mb_x == 0 || mb_x == b_stride-1) | |
2827 && (mb_y == 0 || mb_y == b_height-1)){ | |
2828 if(mb_x == 0) | |
2829 x1 = block_w; | |
2830 else | |
2831 x0 = block_w; | |
2832 if(mb_y == 0) | |
2833 y1 = block_w; | |
2834 else | |
2835 y0 = block_w; | |
2836 for(y=y0; y<y1; y++) | |
2837 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
2838 } | |
2839 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2840 if(block_w==16){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2841 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2842 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
6412 | 2843 /* FIXME cmps overlap but do not cover the wavelet's whole support. |
2844 * So improving the score of one block is not strictly guaranteed | |
2845 * to improve the score of the whole frame, thus iterative motion | |
2846 * estimation does not always converge. */ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2847 if(s->avctx->me_cmp == FF_CMP_W97) |
4197 | 2848 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2849 else if(s->avctx->me_cmp == FF_CMP_W53) |
4197 | 2850 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2851 else{ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2852 distortion = 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2853 for(i=0; i<4; i++){ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2854 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2855 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2856 } |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2857 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2858 }else{ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2859 assert(block_w==8); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2860 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2861 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2862 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2863 if(plane_index==0){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2864 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2865 /* ..RRr |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2866 * .RXx. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2867 * rxx.. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2868 */ |
3051 | 2869 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2870 } |
3057 | 2871 if(mb_x == b_stride-2) |
2872 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2873 } |
3051 | 2874 return distortion + rate*penalty_factor; |
2875 } | |
2876 | |
2877 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2878 int i, y2; | |
2879 Plane *p= &s->plane[plane_index]; | |
2880 const int block_size = MB_SIZE >> s->block_max_depth; | |
2881 const int block_w = plane_index ? block_size/2 : block_size; | |
2882 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2883 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2884 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2885 uint8_t *dst= s->current_picture.data[plane_index]; | |
2886 uint8_t *src= s-> input_picture.data[plane_index]; | |
6426 | 2887 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst |
2888 // const has only been removed from zero_dst to suppress a warning | |
2889 static IDWTELEM zero_dst[4096]; //FIXME | |
3051 | 2890 const int b_stride = s->b_width << s->block_max_depth; |
2891 const int w= p->width; | |
2892 const int h= p->height; | |
2893 int distortion= 0; | |
2894 int rate= 0; | |
2895 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
2896 | |
2897 for(i=0; i<9; i++){ | |
2898 int mb_x2= mb_x + (i%3) - 1; | |
2899 int mb_y2= mb_y + (i/3) - 1; | |
2900 int x= block_w*mb_x2 + block_w/2; | |
2901 int y= block_w*mb_y2 + block_w/2; | |
2902 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2903 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3051 | 2904 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
2905 | |
2906 //FIXME find a cleaner/simpler way to skip the outside stuff | |
2907 for(y2= y; y2<0; y2++) | |
2908 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2909 for(y2= h; y2<y+block_w; y2++) | |
2910 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2911 if(x<0){ | |
2912 for(y2= y; y2<y+block_w; y2++) | |
2913 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
2914 } | |
2915 if(x+block_w > w){ | |
2916 for(y2= y; y2<y+block_w; y2++) | |
2917 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
2918 } | |
2919 | |
2920 assert(block_w== 8 || block_w==16); | |
2921 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
2922 } | |
2923 | |
2924 if(plane_index==0){ | |
2925 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
2926 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
2927 | |
2928 /* ..RRRr | |
2929 * .RXXx. | |
2930 * .RXXx. | |
2931 * rxxx. | |
2932 */ | |
2933 if(merged) | |
2934 rate = get_block_bits(s, mb_x, mb_y, 2); | |
2935 for(i=merged?4:0; i<9; i++){ | |
2936 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
2937 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
2938 } | |
2939 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2940 return distortion + rate*penalty_factor; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2941 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2942 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2943 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2944 const int b_stride= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2945 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2946 BlockNode backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2947 int rd, index, value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2948 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2949 assert(mb_x>=0 && mb_y>=0); |
2994 | 2950 assert(mb_x<b_stride); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2951 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2952 if(intra){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2953 block->color[0] = p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2954 block->color[1] = p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2955 block->color[2] = p[2]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2956 block->type |= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2957 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2958 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
3314 | 2959 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2960 if(s->me_cache[index] == value) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2961 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2962 s->me_cache[index]= value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2963 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2964 block->mx= p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2965 block->my= p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2966 block->type &= ~BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2967 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2968 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2969 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2970 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2971 //FIXME chroma |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2972 if(rd < *best_rd){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2973 *best_rd= rd; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2974 return 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2975 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2976 *block= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2977 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2978 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2979 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2980 |
6412 | 2981 /* special case for int[2] args we discard afterwards, |
2982 * fixes compilation problem with gcc 2.95 */ | |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2983 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2984 int p[2] = {p0, p1}; |
3197 | 2985 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2986 } |
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2987 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2988 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
3051 | 2989 const int b_stride= s->b_width << s->block_max_depth; |
2990 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
2991 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
2992 int rd, index, value; | |
2993 | |
2994 assert(mb_x>=0 && mb_y>=0); | |
2995 assert(mb_x<b_stride); | |
2996 assert(((mb_x|mb_y)&1) == 0); | |
2997 | |
2998 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3314 | 2999 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
3051 | 3000 if(s->me_cache[index] == value) |
3001 return 0; | |
3002 s->me_cache[index]= value; | |
3003 | |
3004 block->mx= p0; | |
3005 block->my= p1; | |
3314 | 3006 block->ref= ref; |
3051 | 3007 block->type &= ~BLOCK_INTRA; |
3008 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
3009 | |
3010 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3011 | |
3012 //FIXME chroma | |
3013 if(rd < *best_rd){ | |
3014 *best_rd= rd; | |
3015 return 1; | |
3016 }else{ | |
3017 block[0]= backup[0]; | |
3018 block[1]= backup[1]; | |
3019 block[b_stride]= backup[2]; | |
3020 block[b_stride+1]= backup[3]; | |
3021 return 0; | |
3022 } | |
3023 } | |
3024 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3025 static void iterative_me(SnowContext *s){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3026 int pass, mb_x, mb_y; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3027 const int b_width = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3028 const int b_height= s->b_height << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3029 const int b_stride= b_width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3030 int color[3]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3031 |
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3032 { |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3033 RangeCoder r = s->c; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3034 uint8_t state[sizeof(s->block_state)]; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3035 memcpy(state, s->block_state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3036 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3037 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3038 encode_q_branch(s, 0, mb_x, mb_y); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3039 s->c = r; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3040 memcpy(s->block_state, state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3041 } |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3042 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
3043 for(pass=0; pass<25; pass++){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3044 int change= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3045 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3046 for(mb_y= 0; mb_y<b_height; mb_y++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3047 for(mb_x= 0; mb_x<b_width; mb_x++){ |
3314 | 3048 int dia_change, i, j, ref; |
3049 int best_rd= INT_MAX, ref_rd; | |
3050 BlockNode backup, ref_b; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3051 const int index= mb_x + mb_y * b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3052 BlockNode *block= &s->block[index]; |
3324 | 3053 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
3054 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
3055 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
3056 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
3057 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
3058 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
3059 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
3060 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3061 const int b_w= (MB_SIZE >> s->block_max_depth); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3062 uint8_t obmc_edged[b_w*2][b_w*2]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3063 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3064 if(pass && (block->type & BLOCK_OPT)) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3065 continue; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3066 block->type |= BLOCK_OPT; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3067 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3068 backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3069 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3070 if(!s->me_cache_generation) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3071 memset(s->me_cache, 0, sizeof(s->me_cache)); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3072 s->me_cache_generation += 1<<22; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3073 |
6412 | 3074 //FIXME precalculate |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3075 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3076 int x, y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3077 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3078 if(mb_x==0) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3079 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3080 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3081 if(mb_x==b_stride-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3082 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3083 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3084 if(mb_y==0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3085 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3086 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3087 for(y=1; y<b_w; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3088 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3089 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3090 if(mb_y==b_height-1){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3091 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3092 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3093 for(y=b_w; y<b_w*2-1; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3094 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3095 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3096 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3097 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3098 //skip stuff outside the picture |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
3099 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3100 uint8_t *src= s-> input_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3101 uint8_t *dst= s->current_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3102 const int stride= s->current_picture.linesize[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3103 const int block_w= MB_SIZE >> s->block_max_depth; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3104 const int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3105 const int sy= block_w*mb_y - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3106 const int w= s->plane[0].width; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3107 const int h= s->plane[0].height; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3108 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3109 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3110 for(y=sy; y<0; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3111 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3112 for(y=h; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3113 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3114 if(sx<0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3115 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3116 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3117 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3118 if(sx+block_w*2 > w){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3119 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3120 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3121 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3122 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3123 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3124 // intra(black) = neighbors' contribution to the current block |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3125 for(i=0; i<3; i++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3126 color[i]= get_dc(s, mb_x, mb_y, i); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3127 |
5127 | 3128 // get previous score (cannot be cached due to OBMC) |
3057 | 3129 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
3130 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
3131 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
3132 }else | |
3197 | 3133 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
3134 | |
3314 | 3135 ref_b= *block; |
3136 ref_rd= best_rd; | |
3137 for(ref=0; ref < s->ref_frames; ref++){ | |
3138 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
3139 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
3140 continue; | |
3141 block->ref= ref; | |
3142 best_rd= INT_MAX; | |
3143 | |
3144 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
3145 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
3324 | 3146 if(tb) |
3314 | 3147 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
3324 | 3148 if(lb) |
3314 | 3149 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
3324 | 3150 if(rb) |
3314 | 3151 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
3324 | 3152 if(bb) |
3314 | 3153 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
3154 | |
3155 /* fullpel ME */ | |
6412 | 3156 //FIXME avoid subpel interpolation / round to nearest integer |
3314 | 3157 do{ |
3158 dia_change=0; | |
3159 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3160 for(j=0; j<i; j++){ | |
3161 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3162 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3165 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3166 } |
3314 | 3167 }while(dia_change); |
3168 /* subpel ME */ | |
3169 do{ | |
3170 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3171 dia_change=0; | |
3172 for(i=0; i<8; i++) | |
3173 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
3174 }while(dia_change); | |
3175 //FIXME or try the standard 2 pass qpel or similar | |
3176 | |
3177 mvr[0][0]= block->mx; | |
3178 mvr[0][1]= block->my; | |
3179 if(ref_rd > best_rd){ | |
3180 ref_rd= best_rd; | |
3181 ref_b= *block; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3182 } |
3314 | 3183 } |
3184 best_rd= ref_rd; | |
3185 *block= ref_b; | |
2998 | 3186 #if 1 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3187 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3188 //FIXME RD style color selection |
2998 | 3189 #endif |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3190 if(!same_block(block, &backup)){ |
3324 | 3191 if(tb ) tb ->type &= ~BLOCK_OPT; |
3192 if(lb ) lb ->type &= ~BLOCK_OPT; | |
3193 if(rb ) rb ->type &= ~BLOCK_OPT; | |
3194 if(bb ) bb ->type &= ~BLOCK_OPT; | |
3195 if(tlb) tlb->type &= ~BLOCK_OPT; | |
3196 if(trb) trb->type &= ~BLOCK_OPT; | |
3197 if(blb) blb->type &= ~BLOCK_OPT; | |
3198 if(brb) brb->type &= ~BLOCK_OPT; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3199 change ++; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3200 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3201 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3202 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3203 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3204 if(!change) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3205 break; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3206 } |
3051 | 3207 |
3208 if(s->block_max_depth == 1){ | |
3209 int change= 0; | |
3210 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3211 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3324 | 3212 int i; |
3051 | 3213 int best_rd, init_rd; |
3214 const int index= mb_x + mb_y * b_stride; | |
3215 BlockNode *b[4]; | |
3216 | |
3217 b[0]= &s->block[index]; | |
3218 b[1]= b[0]+1; | |
3219 b[2]= b[0]+b_stride; | |
3220 b[3]= b[2]+1; | |
3221 if(same_block(b[0], b[1]) && | |
3222 same_block(b[0], b[2]) && | |
3223 same_block(b[0], b[3])) | |
3224 continue; | |
3225 | |
3226 if(!s->me_cache_generation) | |
3227 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3228 s->me_cache_generation += 1<<22; | |
3229 | |
3230 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3231 | |
3314 | 3232 //FIXME more multiref search? |
3051 | 3233 check_4block_inter(s, mb_x, mb_y, |
3234 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3314 | 3235 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
3051 | 3236 |
3237 for(i=0; i<4; i++) | |
3238 if(!(b[i]->type&BLOCK_INTRA)) | |
3314 | 3239 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
3051 | 3240 |
3241 if(init_rd != best_rd) | |
3242 change++; | |
3243 } | |
3244 } | |
3245 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3246 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3247 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3248 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3249 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
2138 | 3250 const int w= b->width; |
3251 const int h= b->height; | |
4594 | 3252 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
5575 | 3253 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
2150 | 3254 int x,y, thres1, thres2; |
2138 | 3255 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3256 if(s->qlog == LOSSLESS_QLOG){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3257 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3258 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3259 dst[x + y*stride]= src[x + y*stride]; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3260 return; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3261 } |
2967 | 3262 |
2138 | 3263 bias= bias ? 0 : (3*qmul)>>3; |
2150 | 3264 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3265 thres2= 2*thres1; | |
2967 | 3266 |
2138 | 3267 if(!bias){ |
3268 for(y=0; y<h; y++){ | |
3269 for(x=0; x<w; x++){ | |
2150 | 3270 int i= src[x + y*stride]; |
2967 | 3271 |
2150 | 3272 if((unsigned)(i+thres1) > thres2){ |
3273 if(i>=0){ | |
3274 i<<= QEXPSHIFT; | |
3275 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3276 dst[x + y*stride]= i; |
2150 | 3277 }else{ |
3278 i= -i; | |
3279 i<<= QEXPSHIFT; | |
3280 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3281 dst[x + y*stride]= -i; |
2150 | 3282 } |
3283 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3284 dst[x + y*stride]= 0; |
2138 | 3285 } |
3286 } | |
3287 }else{ | |
3288 for(y=0; y<h; y++){ | |
3289 for(x=0; x<w; x++){ | |
2967 | 3290 int i= src[x + y*stride]; |
3291 | |
2150 | 3292 if((unsigned)(i+thres1) > thres2){ |
3293 if(i>=0){ | |
3294 i<<= QEXPSHIFT; | |
3295 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3296 dst[x + y*stride]= i; |
2150 | 3297 }else{ |
3298 i= -i; | |
3299 i<<= QEXPSHIFT; | |
3300 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3301 dst[x + y*stride]= -i; |
2150 | 3302 } |
3303 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3304 dst[x + y*stride]= 0; |
2138 | 3305 } |
3306 } | |
3307 } | |
3308 } | |
3309 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3310 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3311 const int w= b->width; |
4594 | 3312 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3313 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3314 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3315 int x,y; |
2967 | 3316 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3317 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3318 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3319 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3320 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3321 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3322 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3323 int i= line[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3324 if(i<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3325 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3326 }else if(i>0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3327 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3328 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3329 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3330 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3331 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3332 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3333 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
2138 | 3334 const int w= b->width; |
3335 const int h= b->height; | |
4594 | 3336 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3337 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2138 | 3338 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3339 int x,y; | |
2967 | 3340 |
2161 | 3341 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3342 |
2138 | 3343 for(y=0; y<h; y++){ |
3344 for(x=0; x<w; x++){ | |
3345 int i= src[x + y*stride]; | |
3346 if(i<0){ | |
3347 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3348 }else if(i>0){ | |
3349 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3350 } | |
3351 } | |
3352 } | |
3353 } | |
3354 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3355 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3356 const int w= b->width; |
3357 const int h= b->height; | |
3358 int x,y; | |
2967 | 3359 |
2138 | 3360 for(y=h-1; y>=0; y--){ |
3361 for(x=w-1; x>=0; x--){ | |
3362 int i= x + y*stride; | |
2967 | 3363 |
2138 | 3364 if(x){ |
3365 if(use_median){ | |
3366 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3367 else src[i] -= src[i - 1]; | |
3368 }else{ | |
3369 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3370 else src[i] -= src[i - 1]; | |
3371 } | |
3372 }else{ | |
3373 if(y) src[i] -= src[i - stride]; | |
3374 } | |
3375 } | |
3376 } | |
3377 } | |
3378 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3379 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3380 const int w= b->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3381 int x,y; |
2967 | 3382 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3383 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3384 IDWTELEM * prev; |
2967 | 3385 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3386 if (start_y != 0) |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3387 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2967 | 3388 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3389 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3390 prev = line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3391 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3392 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3393 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3394 if(x){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3395 if(use_median){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3396 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3397 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3398 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3399 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3400 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3401 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3402 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3403 if(y) line[x] += prev[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3404 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3405 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3406 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3407 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3408 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3409 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3410 const int w= b->width; |
3411 const int h= b->height; | |
3412 int x,y; | |
2967 | 3413 |
2138 | 3414 for(y=0; y<h; y++){ |
3415 for(x=0; x<w; x++){ | |
3416 int i= x + y*stride; | |
2967 | 3417 |
2138 | 3418 if(x){ |
3419 if(use_median){ | |
3420 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3421 else src[i] += src[i - 1]; | |
3422 }else{ | |
3423 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3424 else src[i] += src[i - 1]; | |
3425 } | |
3426 }else{ | |
3427 if(y) src[i] += src[i - stride]; | |
3428 } | |
3429 } | |
3430 } | |
3431 } | |
3432 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3433 static void encode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3434 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3435 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3436 for(plane_index=0; plane_index<2; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3437 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3438 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3439 if(orientation==2) continue; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3440 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3441 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3442 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3443 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3444 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3445 |
2138 | 3446 static void encode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3447 int plane_index, i; |
2967 | 3448 uint8_t kstate[32]; |
3449 | |
3450 memset(kstate, MID_STATE, sizeof(kstate)); | |
2138 | 3451 |
2335 | 3452 put_rac(&s->c, kstate, s->keyframe); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3453 if(s->keyframe || s->always_reset){ |
2199 | 3454 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3455 s->last_spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3456 s->last_qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3457 s->last_qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3458 s->last_mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3459 s->last_block_max_depth= 0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3460 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3461 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3462 p->last_htaps=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3463 p->last_diag_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3464 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3465 } |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3466 } |
2138 | 3467 if(s->keyframe){ |
3468 put_symbol(&s->c, s->header_state, s->version, 0); | |
2335 | 3469 put_rac(&s->c, s->header_state, s->always_reset); |
2138 | 3470 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3471 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3472 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3473 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
3474 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
3475 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
2335 | 3476 put_rac(&s->c, s->header_state, s->spatial_scalability); |
3477 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
3314 | 3478 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
2138 | 3479 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3480 encode_qlogs(s); |
2138 | 3481 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3482 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3483 if(!s->keyframe){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3484 int update_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3485 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3486 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3487 update_mc |= p->last_htaps != p->htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3488 update_mc |= p->last_diag_mc != p->diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3489 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3490 } |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3491 put_rac(&s->c, s->header_state, update_mc); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3492 if(update_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3493 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3494 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3495 put_rac(&s->c, s->header_state, p->diag_mc); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3496 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3497 for(i= p->htaps/2; i; i--) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3498 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3499 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3500 } |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3501 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3502 put_rac(&s->c, s->header_state, 1); |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3503 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3504 encode_qlogs(s); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3505 }else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3506 put_rac(&s->c, s->header_state, 0); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3507 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3508 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3509 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3510 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3511 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3512 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3513 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3514 |
6144 | 3515 } |
3516 | |
3517 static void update_last_header_values(SnowContext *s){ | |
3518 int plane_index; | |
3519 | |
3520 if(!s->keyframe){ | |
3521 for(plane_index=0; plane_index<2; plane_index++){ | |
3522 Plane *p= &s->plane[plane_index]; | |
3523 p->last_diag_mc= p->diag_mc; | |
3524 p->last_htaps = p->htaps; | |
3525 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); | |
3526 } | |
3527 } | |
3528 | |
6414 | 3529 s->last_spatial_decomposition_type = s->spatial_decomposition_type; |
3530 s->last_qlog = s->qlog; | |
3531 s->last_qbias = s->qbias; | |
3532 s->last_mv_scale = s->mv_scale; | |
3533 s->last_block_max_depth = s->block_max_depth; | |
3534 s->last_spatial_decomposition_count = s->spatial_decomposition_count; | |
2138 | 3535 } |
3536 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3537 static void decode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3538 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3539 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3540 for(plane_index=0; plane_index<3; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3541 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3542 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3543 int q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3544 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3545 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3546 else q= get_symbol(&s->c, s->header_state, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3547 s->plane[plane_index].band[level][orientation].qlog= q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3548 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3549 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3550 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3551 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3552 |
9367 | 3553 #define GET_S(dst, check) \ |
3554 tmp= get_symbol(&s->c, s->header_state, 0);\ | |
3555 if(!(check)){\ | |
3556 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\ | |
3557 return -1;\ | |
3558 }\ | |
3559 dst= tmp; | |
3560 | |
2138 | 3561 static int decode_header(SnowContext *s){ |
9365 | 3562 int plane_index, tmp; |
2335 | 3563 uint8_t kstate[32]; |
2138 | 3564 |
2967 | 3565 memset(kstate, MID_STATE, sizeof(kstate)); |
2335 | 3566 |
3567 s->keyframe= get_rac(&s->c, kstate); | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3568 if(s->keyframe || s->always_reset){ |
2199 | 3569 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3570 s->spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3571 s->qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3572 s->qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3573 s->mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3574 s->block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3575 } |
2138 | 3576 if(s->keyframe){ |
9367 | 3577 GET_S(s->version, tmp <= 0U) |
2335 | 3578 s->always_reset= get_rac(&s->c, s->header_state); |
2138 | 3579 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3580 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
9371 | 3581 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) |
2138 | 3582 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); |
3583 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3584 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2335 | 3585 s->spatial_scalability= get_rac(&s->c, s->header_state); |
3586 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
9367 | 3587 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES) |
3588 s->max_ref_frames++; | |
2138 | 3589 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3590 decode_qlogs(s); |
2138 | 3591 } |
2967 | 3592 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3593 if(!s->keyframe){ |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3594 if(get_rac(&s->c, s->header_state)){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3595 for(plane_index=0; plane_index<2; plane_index++){ |
5807 | 3596 int htaps, i, sum=0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3597 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3598 p->diag_mc= get_rac(&s->c, s->header_state); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3599 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
3600 if((unsigned)htaps > HTAPS_MAX || htaps==0) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3601 return -1; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3602 p->htaps= htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3603 for(i= htaps/2; i; i--){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3604 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3605 sum += p->hcoeff[i]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3606 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3607 p->hcoeff[0]= 32-sum; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3608 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3609 s->plane[2].diag_mc= s->plane[1].diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3610 s->plane[2].htaps = s->plane[1].htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3611 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3612 } |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3613 if(get_rac(&s->c, s->header_state)){ |
9371 | 3614 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3615 decode_qlogs(s); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3616 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3617 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3618 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3619 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
9366 | 3620 if(s->spatial_decomposition_type > 1U){ |
2138 | 3621 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
3622 return -1; | |
3623 } | |
9373
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3624 if(FFMIN(s->avctx-> width>>s->chroma_h_shift, |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3625 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){ |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3626 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count); |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3627 return -1; |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3628 } |
2967 | 3629 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3630 s->qlog += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3631 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3632 s->qbias += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3633 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3634 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
2952 | 3635 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
3636 s->block_max_depth= 0; | |
3637 return -1; | |
3638 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3639 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3640 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3641 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3642 |
3075 | 3643 static void init_qexp(void){ |
2600 | 3644 int i; |
3645 double v=128; | |
3646 | |
3647 for(i=0; i<QROOT; i++){ | |
3648 qexp[i]= lrintf(v); | |
2967 | 3649 v *= pow(2, 1.0 / QROOT); |
2600 | 3650 } |
3651 } | |
3652 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
3653 static av_cold int common_init(AVCodecContext *avctx){ |
2138 | 3654 SnowContext *s = avctx->priv_data; |
3655 int width, height; | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3656 int i, j; |
2138 | 3657 |
3658 s->avctx= avctx; | |
9365 | 3659 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe |
2967 | 3660 |
2138 | 3661 dsputil_init(&s->dsp, avctx); |
3662 | |
3663 #define mcf(dx,dy)\ | |
3664 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3665 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3666 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3667 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3668 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3669 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
2138 | 3670 |
3671 mcf( 0, 0) | |
3672 mcf( 4, 0) | |
3673 mcf( 8, 0) | |
3674 mcf(12, 0) | |
3675 mcf( 0, 4) | |
3676 mcf( 4, 4) | |
3677 mcf( 8, 4) | |
3678 mcf(12, 4) | |
3679 mcf( 0, 8) | |
3680 mcf( 4, 8) | |
3681 mcf( 8, 8) | |
3682 mcf(12, 8) | |
3683 mcf( 0,12) | |
3684 mcf( 4,12) | |
3685 mcf( 8,12) | |
3686 mcf(12,12) | |
3687 | |
3688 #define mcfh(dx,dy)\ | |
3689 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3690 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3691 mc_block_hpel ## dx ## dy ## 16;\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3692 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3693 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3694 mc_block_hpel ## dx ## dy ## 8; |
2138 | 3695 |
3696 mcfh(0, 0) | |
3697 mcfh(8, 0) | |
3698 mcfh(0, 8) | |
3699 mcfh(8, 8) | |
2600 | 3700 |
3701 if(!qexp[0]) | |
3702 init_qexp(); | |
3703 | |
2138 | 3704 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
2967 | 3705 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3706 width= s->avctx->width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3707 height= s->avctx->height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3708 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3709 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
6412 | 3710 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here |
2967 | 3711 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3712 for(i=0; i<MAX_REF_FRAMES; i++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3713 for(j=0; j<MAX_REF_FRAMES; j++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3714 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3715 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3716 s->avctx->get_buffer(s->avctx, &s->mconly_picture); |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
3717 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3718 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3719 return 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3720 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3721 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3722 static int common_init_after_header(AVCodecContext *avctx){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3723 SnowContext *s = avctx->priv_data; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3724 int plane_index, level, orientation; |
2967 | 3725 |
3726 for(plane_index=0; plane_index<3; plane_index++){ | |
2138 | 3727 int w= s->avctx->width; |
3728 int h= s->avctx->height; | |
3729 | |
3730 if(plane_index){ | |
3731 w>>= s->chroma_h_shift; | |
3732 h>>= s->chroma_v_shift; | |
3733 } | |
3734 s->plane[plane_index].width = w; | |
3735 s->plane[plane_index].height= h; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3736 |
2138 | 3737 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3738 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3739 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 3740 |
2138 | 3741 b->buf= s->spatial_dwt_buffer; |
3742 b->level= level; | |
3743 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3744 b->width = (w + !(orientation&1))>>1; | |
3745 b->height= (h + !(orientation>1))>>1; | |
2967 | 3746 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3747 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3748 b->buf_x_offset = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3749 b->buf_y_offset = 0; |
2967 | 3750 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3751 if(orientation&1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3752 b->buf += (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3753 b->buf_x_offset = (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3754 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3755 if(orientation>1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3756 b->buf += b->stride>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3757 b->buf_y_offset = b->stride_line >> 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3758 } |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3759 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
2967 | 3760 |
2138 | 3761 if(level) |
3762 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3763 //FIXME avoid this realloc |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3764 av_freep(&b->x_coeff); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3765 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2138 | 3766 } |
3767 w= (w+1)>>1; | |
3768 h= (h+1)>>1; | |
3769 } | |
3770 } | |
2967 | 3771 |
2138 | 3772 return 0; |
3773 } | |
3774 | |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3775 static int qscale2qlog(int qscale){ |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3776 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3777 + 61*QROOT/8; //<64 >60 |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3778 } |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3779 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3780 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
3313 | 3781 { |
6412 | 3782 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. |
3313 | 3783 * FIXME we know exact mv bits at this point, |
3784 * but ratecontrol isn't set up to include them. */ | |
3785 uint32_t coef_sum= 0; | |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3786 int level, orientation, delta_qlog; |
3313 | 3787 |
3788 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3789 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3790 SubBand *b= &s->plane[0].band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3791 IDWTELEM *buf= b->ibuf; |
3313 | 3792 const int w= b->width; |
3793 const int h= b->height; | |
3794 const int stride= b->stride; | |
4594 | 3795 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3313 | 3796 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3797 const int qdiv= (1<<16)/qmul; | |
3798 int x, y; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3799 //FIXME this is ugly |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3800 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3801 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3802 buf[x+y*stride]= b->buf[x+y*stride]; |
3313 | 3803 if(orientation==0) |
3804 decorrelate(s, b, buf, stride, 1, 0); | |
3805 for(y=0; y<h; y++) | |
3806 for(x=0; x<w; x++) | |
3807 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
3808 } | |
3809 } | |
3810 | |
3811 /* ugly, ratecontrol just takes a sqrt again */ | |
3812 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
3813 assert(coef_sum < INT_MAX); | |
3814 | |
6481 | 3815 if(pict->pict_type == FF_I_TYPE){ |
3313 | 3816 s->m.current_picture.mb_var_sum= coef_sum; |
3817 s->m.current_picture.mc_mb_var_sum= 0; | |
3818 }else{ | |
3819 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
3820 s->m.current_picture.mb_var_sum= 0; | |
3821 } | |
3822 | |
3823 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
3766 | 3824 if (pict->quality < 0) |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3825 return INT_MIN; |
3313 | 3826 s->lambda= pict->quality * 3/2; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3827 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3828 s->qlog+= delta_qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3829 return delta_qlog; |
3313 | 3830 } |
2138 | 3831 |
5909 | 3832 static void calculate_visual_weight(SnowContext *s, Plane *p){ |
2138 | 3833 int width = p->width; |
3834 int height= p->height; | |
2198 | 3835 int level, orientation, x, y; |
2138 | 3836 |
3837 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3838 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3839 SubBand *b= &p->band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3840 IDWTELEM *ibuf= b->ibuf; |
2138 | 3841 int64_t error=0; |
2967 | 3842 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3843 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3844 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3845 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2138 | 3846 for(y=0; y<height; y++){ |
3847 for(x=0; x<width; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3848 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
2138 | 3849 error += d*d; |
3850 } | |
3851 } | |
3852 | |
3853 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
3854 } | |
3855 } | |
3856 } | |
3857 | |
5702 | 3858 #define QUANTIZE2 0 |
3859 | |
3860 #if QUANTIZE2==1 | |
3861 #define Q2_STEP 8 | |
3862 | |
3863 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
3864 SubBand *b= &p->band[level][orientation]; | |
3865 int x, y; | |
3866 int xo=0; | |
3867 int yo=0; | |
3868 int step= 1 << (s->spatial_decomposition_count - level); | |
3869 | |
3870 if(orientation&1) | |
3871 xo= step>>1; | |
3872 if(orientation&2) | |
3873 yo= step>>1; | |
3874 | |
6412 | 3875 //FIXME bias for nonzero ? |
5702 | 3876 //FIXME optimize |
3877 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
3878 for(y=0; y<p->height; y++){ | |
3879 for(x=0; x<p->width; x++){ | |
3880 int sx= (x-xo + step/2) / step / Q2_STEP; | |
3881 int sy= (y-yo + step/2) / step / Q2_STEP; | |
3882 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
3883 assert(sx>=0 && sy>=0 && sx < score_stride); | |
3884 v= ((v+8)>>4)<<4; | |
3885 score[sx + sy*score_stride] += v*v; | |
3886 assert(score[sx + sy*score_stride] >= 0); | |
3887 } | |
3888 } | |
3889 } | |
3890 | |
3891 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
3892 int level, orientation; | |
3893 | |
3894 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3895 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3896 SubBand *b= &p->band[level][orientation]; | |
3897 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3898 | |
3899 dequantize(s, b, dst, b->stride); | |
3900 } | |
3901 } | |
3902 } | |
3903 | |
3904 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
3905 int level, orientation, ys, xs, x, y, pass; | |
3906 IDWTELEM best_dequant[height * stride]; | |
3907 IDWTELEM idwt2_buffer[height * stride]; | |
3908 const int score_stride= (width + 10)/Q2_STEP; | |
3909 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3910 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3911 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
3912 | |
3913 //FIXME pass the copy cleanly ? | |
3914 | |
3915 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
3916 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3917 | |
3918 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3919 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3920 SubBand *b= &p->band[level][orientation]; | |
3921 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3922 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
6412 | 3923 assert(src == b->buf); // code does not depend on this but it is true currently |
5702 | 3924 |
3925 quantize(s, b, dst, src, b->stride, s->qbias); | |
3926 } | |
3927 } | |
3928 for(pass=0; pass<1; pass++){ | |
3929 if(s->qbias == 0) //keyframe | |
3930 continue; | |
3931 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3932 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3933 SubBand *b= &p->band[level][orientation]; | |
3934 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3935 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3936 | |
3937 for(ys= 0; ys<Q2_STEP; ys++){ | |
3938 for(xs= 0; xs<Q2_STEP; xs++){ | |
3939 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3940 dequantize_all(s, p, idwt2_buffer, width, height); | |
3941 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3942 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3943 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3944 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3945 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3946 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
3947 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
6412 | 3948 //FIXME try more than just -- |
5702 | 3949 } |
3950 } | |
3951 dequantize_all(s, p, idwt2_buffer, width, height); | |
3952 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3953 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3954 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3955 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3956 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
3957 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
3958 best_score[score_idx]= score[score_idx]; | |
3959 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
3960 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
3961 //FIXME copy instead | |
3962 } | |
3963 } | |
3964 } | |
3965 } | |
3966 } | |
3967 } | |
3968 } | |
3969 } | |
6412 | 3970 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end |
5702 | 3971 } |
3972 | |
5910 | 3973 #endif /* QUANTIZE2==1 */ |
5702 | 3974 |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
3975 static av_cold int encode_init(AVCodecContext *avctx) |
2138 | 3976 { |
3977 SnowContext *s = avctx->priv_data; | |
2198 | 3978 int plane_index; |
2138 | 3979 |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3980 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
6412 | 3981 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
3982 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
2151 | 3983 return -1; |
3984 } | |
2967 | 3985 |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3986 if(avctx->prediction_method == DWT_97 |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3987 && (avctx->flags & CODEC_FLAG_QSCALE) |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3988 && avctx->global_quality == 0){ |
6412 | 3989 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3990 return -1; |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3991 } |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3992 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3993 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3994 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3995 s->chroma_h_shift= 1; //FIXME XXX |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3996 s->chroma_v_shift= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3997 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3998 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3999 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4000 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4001 for(plane_index=0; plane_index<3; plane_index++){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4002 s->plane[plane_index].diag_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4003 s->plane[plane_index].htaps= 6; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4004 s->plane[plane_index].hcoeff[0]= 40; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4005 s->plane[plane_index].hcoeff[1]= -10; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4006 s->plane[plane_index].hcoeff[2]= 2; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4007 s->plane[plane_index].fast_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4008 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4009 |
2138 | 4010 common_init(avctx); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4011 alloc_blocks(s); |
2967 | 4012 |
2138 | 4013 s->version=0; |
2967 | 4014 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4015 s->m.avctx = avctx; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4016 s->m.flags = avctx->flags; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4017 s->m.bit_rate= avctx->bit_rate; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4018 |
8239
dcfdb3352dde
Fix regression test failure with pthreads on multiprocessor systems.
michael
parents:
8214
diff
changeset
|
4019 s->m.me.temp = |
2138 | 4020 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
4021 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4022 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4023 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
2138 | 4024 h263_encode_init(&s->m); //mv_penalty |
4025 | |
3314 | 4026 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
4027 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4028 if(avctx->flags&CODEC_FLAG_PASS1){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4029 if(!avctx->stats_out) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4030 avctx->stats_out = av_mallocz(256); |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4031 } |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4032 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4033 if(ff_rate_control_init(&s->m) < 0) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4034 return -1; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4035 } |
3313 | 4036 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4037 |
2138 | 4038 avctx->coded_frame= &s->current_picture; |
4039 switch(avctx->pix_fmt){ | |
4040 // case PIX_FMT_YUV444P: | |
4041 // case PIX_FMT_YUV422P: | |
4042 case PIX_FMT_YUV420P: | |
4043 case PIX_FMT_GRAY8: | |
4044 // case PIX_FMT_YUV411P: | |
4045 // case PIX_FMT_YUV410P: | |
4046 s->colorspace_type= 0; | |
4047 break; | |
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
4048 /* case PIX_FMT_RGB32: |
2138 | 4049 s->colorspace= 1; |
4050 break;*/ | |
4051 default: | |
5908 | 4052 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); |
2138 | 4053 return -1; |
4054 } | |
4055 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
4056 s->chroma_h_shift= 1; | |
4057 s->chroma_v_shift= 1; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4058 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4059 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4060 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4061 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4062 s->avctx->get_buffer(s->avctx, &s->input_picture); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4063 |
3314 | 4064 if(s->avctx->me_method == ME_ITER){ |
4065 int i; | |
4066 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
4067 for(i=0; i<s->max_ref_frames; i++){ | |
4068 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
4069 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
4070 } | |
4071 } | |
4072 | |
2138 | 4073 return 0; |
4074 } | |
4075 | |
5924 | 4076 #define USE_HALFPEL_PLANE 0 |
4077 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4078 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4079 int p,x,y; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4080 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4081 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4082 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4083 for(p=0; p<3; p++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4084 int is_chroma= !!p; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4085 int w= s->avctx->width >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4086 int h= s->avctx->height >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4087 int ls= frame->linesize[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4088 uint8_t *src= frame->data[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4089 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4090 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4091 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4092 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4093 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4094 halfpel[0][p]= src; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4095 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4096 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4097 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4098 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4099 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4100 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4101 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4102 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4103 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4104 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4105 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4106 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4107 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4108 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4109 src= halfpel[1][p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4110 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4111 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4112 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4113 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4114 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4115 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4116 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4117 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4118 //FIXME border! |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4119 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4120 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4121 |
9375
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4122 static void release_buffer(AVCodecContext *avctx){ |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4123 SnowContext *s = avctx->priv_data; |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4124 int i; |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4125 |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4126 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4127 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4128 for(i=0; i<9; i++) |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4129 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4130 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4131 } |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4132 } |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4133 |
2138 | 4134 static int frame_start(SnowContext *s){ |
4135 AVFrame tmp; | |
2187 | 4136 int w= s->avctx->width; //FIXME round up to x16 ? |
4137 int h= s->avctx->height; | |
2138 | 4138 |
2187 | 4139 if(s->current_picture.data[0]){ |
6437 | 4140 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); |
4141 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
4142 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
2187 | 4143 } |
4144 | |
9376
e376facae584
Make sure the next used frame is released so get_buffer() wont fail.
michael
parents:
9375
diff
changeset
|
4145 release_buffer(s->avctx); |
e376facae584
Make sure the next used frame is released so get_buffer() wont fail.
michael
parents:
9375
diff
changeset
|
4146 |
3314 | 4147 tmp= s->last_picture[s->max_ref_frames-1]; |
4148 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4149 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
5924 | 4150 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4151 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); |
3314 | 4152 s->last_picture[0]= s->current_picture; |
2138 | 4153 s->current_picture= tmp; |
2967 | 4154 |
3314 | 4155 if(s->keyframe){ |
4156 s->ref_frames= 0; | |
4157 }else{ | |
4158 int i; | |
4159 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
4160 if(i && s->last_picture[i-1].key_frame) | |
4161 break; | |
4162 s->ref_frames= i; | |
9372
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4163 if(s->ref_frames==0){ |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4164 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4165 return -1; |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4166 } |
3314 | 4167 } |
4168 | |
2138 | 4169 s->current_picture.reference= 1; |
4170 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
4171 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
4172 return -1; | |
4173 } | |
2967 | 4174 |
3314 | 4175 s->current_picture.key_frame= s->keyframe; |
4176 | |
2138 | 4177 return 0; |
4178 } | |
4179 | |
4180 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
4181 SnowContext *s = avctx->priv_data; | |
2335 | 4182 RangeCoder * const c= &s->c; |
2138 | 4183 AVFrame *pict = data; |
4184 const int width= s->avctx->width; | |
4185 const int height= s->avctx->height; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4186 int level, orientation, plane_index, i, y; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4187 uint8_t rc_header_bak[sizeof(s->header_state)]; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4188 uint8_t rc_block_bak[sizeof(s->block_state)]; |
2138 | 4189 |
2335 | 4190 ff_init_range_encoder(c, buf, buf_size); |
4191 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2967 | 4192 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4193 for(i=0; i<3; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4194 int shift= !!i; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4195 for(y=0; y<(height>>shift); y++) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4196 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4197 &pict->data[i][y * pict->linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4198 width>>shift); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4199 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4200 s->new_picture = *pict; |
2138 | 4201 |
3313 | 4202 s->m.picture_number= avctx->frame_number; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4203 if(avctx->flags&CODEC_FLAG_PASS2){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4204 s->m.pict_type = |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4205 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4206 s->keyframe= pict->pict_type==FF_I_TYPE; |
3766 | 4207 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
3193 | 4208 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
3766 | 4209 if (pict->quality < 0) |
4210 return -1; | |
4211 } | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4212 }else{ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4213 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3313 | 4214 s->m.pict_type= |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4215 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4216 } |
2967 | 4217 |
3313 | 4218 if(s->pass1_rc && avctx->frame_number == 0) |
4219 pict->quality= 2*FF_QP2LAMBDA; | |
2161 | 4220 if(pict->quality){ |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4221 s->qlog= qscale2qlog(pict->quality); |
3313 | 4222 s->lambda = pict->quality * 3/2; |
4223 } | |
4224 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
2161 | 4225 s->qlog= LOSSLESS_QLOG; |
3313 | 4226 s->lambda = 0; |
6412 | 4227 }//else keep previous frame's qlog until after motion estimation |
2138 | 4228 |
4229 frame_start(s); | |
4230 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4231 s->m.current_picture_ptr= &s->m.current_picture; |
6481 | 4232 if(pict->pict_type == FF_P_TYPE){ |
2138 | 4233 int block_width = (width +15)>>4; |
4234 int block_height= (height+15)>>4; | |
4235 int stride= s->current_picture.linesize[0]; | |
2967 | 4236 |
2138 | 4237 assert(s->current_picture.data[0]); |
3314 | 4238 assert(s->last_picture[0].data[0]); |
2967 | 4239 |
2138 | 4240 s->m.avctx= s->avctx; |
4241 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3314 | 4242 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
2138 | 4243 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
4244 s->m. last_picture_ptr= &s->m. last_picture; | |
4245 s->m.linesize= | |
4246 s->m. last_picture.linesize[0]= | |
4247 s->m. new_picture.linesize[0]= | |
4248 s->m.current_picture.linesize[0]= stride; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4249 s->m.uvlinesize= s->current_picture.linesize[1]; |
2138 | 4250 s->m.width = width; |
4251 s->m.height= height; | |
4252 s->m.mb_width = block_width; | |
4253 s->m.mb_height= block_height; | |
4254 s->m.mb_stride= s->m.mb_width+1; | |
4255 s->m.b8_stride= 2*s->m.mb_width+1; | |
4256 s->m.f_code=1; | |
4257 s->m.pict_type= pict->pict_type; | |
4258 s->m.me_method= s->avctx->me_method; | |
4259 s->m.me.scene_change_score=0; | |
4260 s->m.flags= s->avctx->flags; | |
4261 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
4262 s->m.out_format= FMT_H263; | |
4263 s->m.unrestricted_mv= 1; | |
4264 | |
3313 | 4265 s->m.lambda = s->lambda; |
2138 | 4266 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4267 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4268 |
2138 | 4269 s->m.dsp= s->dsp; //move |
4270 ff_init_me(&s->m); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4271 s->dsp= s->m.dsp; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4272 } |
2967 | 4273 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4274 if(s->pass1_rc){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4275 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4276 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4277 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4278 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4279 redo_frame: |
2967 | 4280 |
6481 | 4281 if(pict->pict_type == FF_I_TYPE) |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4282 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4283 else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4284 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4285 |
3313 | 4286 s->m.pict_type = pict->pict_type; |
6481 | 4287 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0; |
2138 | 4288 |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4289 common_init_after_header(avctx); |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4290 |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4291 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4292 for(plane_index=0; plane_index<3; plane_index++){ |
5909 | 4293 calculate_visual_weight(s, &s->plane[plane_index]); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4294 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4295 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4296 |
2138 | 4297 encode_header(s); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4298 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4299 encode_blocks(s, 1); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4300 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
2967 | 4301 |
2138 | 4302 for(plane_index=0; plane_index<3; plane_index++){ |
4303 Plane *p= &s->plane[plane_index]; | |
4304 int w= p->width; | |
4305 int h= p->height; | |
4306 int x, y; | |
2198 | 4307 // int bits= put_bits_count(&s->c.pb); |
2138 | 4308 |
6414 | 4309 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
4310 //FIXME optimize | |
4311 if(pict->data[plane_index]) //FIXME gray hack | |
4312 for(y=0; y<h; y++){ | |
4313 for(x=0; x<w; x++){ | |
4314 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; | |
4315 } | |
4316 } | |
4317 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); | |
4318 | |
4319 if( plane_index==0 | |
6481 | 4320 && pict->pict_type == FF_P_TYPE |
6414 | 4321 && !(avctx->flags&CODEC_FLAG_PASS2) |
4322 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | |
4323 ff_init_range_encoder(c, buf, buf_size); | |
4324 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
4325 pict->pict_type= FF_I_TYPE; | |
4326 s->keyframe=1; | |
4327 s->current_picture.key_frame=1; | |
4328 goto redo_frame; | |
2138 | 4329 } |
6414 | 4330 |
4331 if(s->qlog == LOSSLESS_QLOG){ | |
4332 for(y=0; y<h; y++){ | |
4333 for(x=0; x<w; x++){ | |
4334 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; | |
4335 } | |
2161 | 4336 } |
6414 | 4337 }else{ |
4338 for(y=0; y<h; y++){ | |
4339 for(x=0; x<w; x++){ | |
4340 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; | |
4341 } | |
5575 | 4342 } |
4343 } | |
6414 | 4344 |
4345 /* if(QUANTIZE2) | |
4346 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); | |
4347 else*/ | |
4348 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4349 | |
4350 if(s->pass1_rc && plane_index==0){ | |
4351 int delta_qlog = ratecontrol_1pass(s, pict); | |
4352 if (delta_qlog <= INT_MIN) | |
4353 return -1; | |
4354 if(delta_qlog){ | |
4355 //reordering qlog in the bitstream would eliminate this reset | |
4356 ff_init_range_encoder(c, buf, buf_size); | |
4357 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); | |
4358 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); | |
4359 encode_header(s); | |
4360 encode_blocks(s, 0); | |
2161 | 4361 } |
4362 } | |
6414 | 4363 |
4364 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4365 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4366 SubBand *b= &p->band[level][orientation]; | |
4367 | |
4368 if(!QUANTIZE2) | |
4369 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); | |
4370 if(orientation==0) | |
6481 | 4371 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0); |
6414 | 4372 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); |
4373 assert(b->parent==NULL || b->parent->stride == b->stride*2); | |
4374 if(orientation==0) | |
4375 correlate(s, b, b->ibuf, b->stride, 1, 0); | |
4376 } | |
4377 } | |
4378 | |
4379 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4380 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4381 SubBand *b= &p->band[level][orientation]; | |
4382 | |
4383 dequantize(s, b, b->ibuf, b->stride); | |
4384 } | |
4385 } | |
4386 | |
4387 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4388 if(s->qlog == LOSSLESS_QLOG){ | |
4389 for(y=0; y<h; y++){ | |
4390 for(x=0; x<w; x++){ | |
4391 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; | |
4392 } | |
4393 } | |
4394 } | |
4395 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4396 }else{ | |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4397 //ME/MC only |
6481 | 4398 if(pict->pict_type == FF_I_TYPE){ |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4399 for(y=0; y<h; y++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4400 for(x=0; x<w; x++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4401 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4402 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4403 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4404 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4405 }else{ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4406 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4407 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4408 } |
6414 | 4409 } |
2138 | 4410 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4411 int64_t error= 0; | |
2967 | 4412 |
6414 | 4413 if(pict->data[plane_index]) //FIXME gray hack |
4414 for(y=0; y<h; y++){ | |
4415 for(x=0; x<w; x++){ | |
4416 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; | |
4417 error += d*d; | |
4418 } | |
2138 | 4419 } |
4420 s->avctx->error[plane_index] += error; | |
2232 | 4421 s->current_picture.error[plane_index] = error; |
2138 | 4422 } |
6414 | 4423 |
2138 | 4424 } |
4425 | |
6144 | 4426 update_last_header_values(s); |
4427 | |
9374 | 4428 release_buffer(avctx); |
2138 | 4429 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4430 s->current_picture.coded_picture_number = avctx->frame_number; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4431 s->current_picture.pict_type = pict->pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4432 s->current_picture.quality = pict->quality; |
3313 | 4433 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
4434 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
4435 s->m.current_picture.display_picture_number = | |
4436 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
4437 s->m.current_picture.quality = pict->quality; | |
4438 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
4439 if(s->pass1_rc) | |
3766 | 4440 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
4441 return -1; | |
3313 | 4442 if(avctx->flags&CODEC_FLAG_PASS1) |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4443 ff_write_pass1_stats(&s->m); |
3313 | 4444 s->m.last_pict_type = s->m.pict_type; |
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4445 avctx->frame_bits = s->m.frame_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4446 avctx->mv_bits = s->m.mv_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4447 avctx->misc_bits = s->m.misc_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4448 avctx->p_tex_bits = s->m.p_tex_bits; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4449 |
2138 | 4450 emms_c(); |
2967 | 4451 |
2335 | 4452 return ff_rac_terminate(c); |
2138 | 4453 } |
4454 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4455 static av_cold void common_end(SnowContext *s){ |
3314 | 4456 int plane_index, level, orientation, i; |
2192 | 4457 |
2138 | 4458 av_freep(&s->spatial_dwt_buffer); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4459 av_freep(&s->spatial_idwt_buffer); |
2138 | 4460 |
8239
dcfdb3352dde
Fix regression test failure with pthreads on multiprocessor systems.
michael
parents:
8214
diff
changeset
|
4461 s->m.me.temp= NULL; |
2967 | 4462 av_freep(&s->m.me.scratchpad); |
2138 | 4463 av_freep(&s->m.me.map); |
4464 av_freep(&s->m.me.score_map); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4465 av_freep(&s->m.obmc_scratchpad); |
2967 | 4466 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4467 av_freep(&s->block); |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
4468 av_freep(&s->scratchbuf); |
2192 | 4469 |
3314 | 4470 for(i=0; i<MAX_REF_FRAMES; i++){ |
4471 av_freep(&s->ref_mvs[i]); | |
4472 av_freep(&s->ref_scores[i]); | |
4473 if(s->last_picture[i].data[0]) | |
4474 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4475 } | |
4476 | |
2967 | 4477 for(plane_index=0; plane_index<3; plane_index++){ |
2192 | 4478 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
4479 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4480 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 4481 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4482 av_freep(&b->x_coeff); |
2192 | 4483 } |
4484 } | |
4485 } | |
2138 | 4486 } |
4487 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4488 static av_cold int encode_end(AVCodecContext *avctx) |
2138 | 4489 { |
4490 SnowContext *s = avctx->priv_data; | |
4491 | |
4492 common_end(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4493 av_free(avctx->stats_out); |
2138 | 4494 |
4495 return 0; | |
4496 } | |
4497 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4498 static av_cold int decode_init(AVCodecContext *avctx) |
2138 | 4499 { |
2635 | 4500 avctx->pix_fmt= PIX_FMT_YUV420P; |
2138 | 4501 |
4502 common_init(avctx); | |
2967 | 4503 |
2138 | 4504 return 0; |
4505 } | |
4506 | |
9355
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4507 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){ |
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4508 const uint8_t *buf = avpkt->data; |
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4509 int buf_size = avpkt->size; |
2138 | 4510 SnowContext *s = avctx->priv_data; |
2335 | 4511 RangeCoder * const c= &s->c; |
2138 | 4512 int bytes_read; |
4513 AVFrame *picture = data; | |
9374 | 4514 int level, orientation, plane_index; |
2138 | 4515 |
2335 | 4516 ff_init_range_decoder(c, buf, buf_size); |
4517 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2138 | 4518 |
4519 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
5668 | 4520 if(decode_header(s)<0) |
4521 return -1; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4522 common_init_after_header(avctx); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4523 |
5662
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4524 // realloc slice buffer for the case that spatial_decomposition_count changed |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4525 slice_buffer_destroy(&s->sb); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4526 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4527 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4528 for(plane_index=0; plane_index<3; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4529 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4530 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4531 && p->hcoeff[1]==-10 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4532 && p->hcoeff[2]==2; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4533 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4534 |
9370 | 4535 alloc_blocks(s); |
2138 | 4536 |
9372
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4537 if(frame_start(s) < 0) |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4538 return -1; |
6412 | 4539 //keyframe flag duplication mess FIXME |
2138 | 4540 if(avctx->debug&FF_DEBUG_PICT_INFO) |
4541 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2967 | 4542 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4543 decode_blocks(s); |
2138 | 4544 |
4545 for(plane_index=0; plane_index<3; plane_index++){ | |
4546 Plane *p= &s->plane[plane_index]; | |
4547 int w= p->width; | |
4548 int h= p->height; | |
4549 int x, y; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4550 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
2967 | 4551 |
6414 | 4552 if(s->avctx->debug&2048){ |
4553 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
4554 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4555 | |
4556 for(y=0; y<h; y++){ | |
4557 for(x=0; x<w; x++){ | |
4558 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
4559 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
4560 } | |
2138 | 4561 } |
4562 } | |
6414 | 4563 |
4564 { | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4565 for(level=0; level<s->spatial_decomposition_count; level++){ |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4566 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4567 SubBand *b= &p->band[level][orientation]; |
6414 | 4568 unpack_coeffs(s, b, b->parent, orientation); |
4569 } | |
4570 } | |
4571 } | |
4572 | |
4573 { | |
4574 const int mb_h= s->b_height << s->block_max_depth; | |
4575 const int block_size = MB_SIZE >> s->block_max_depth; | |
4576 const int block_w = plane_index ? block_size/2 : block_size; | |
4577 int mb_y; | |
8308 | 4578 DWTCompose cs[MAX_DECOMPOSITIONS]; |
6414 | 4579 int yd=0, yq=0; |
4580 int y; | |
4581 int end_y; | |
4582 | |
4583 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4584 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
4585 | |
4586 int slice_starty = block_w*mb_y; | |
4587 int slice_h = block_w*(mb_y+1); | |
4588 if (!(s->keyframe || s->avctx->debug&512)){ | |
4589 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
4590 slice_h -= (block_w >> 1); | |
4591 } | |
4592 | |
4593 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4594 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4595 SubBand *b= &p->band[level][orientation]; | |
4596 int start_y; | |
4597 int end_y; | |
4598 int our_mb_start = mb_y; | |
4599 int our_mb_end = (mb_y + 1); | |
4600 const int extra= 3; | |
4601 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4602 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
4603 if (!(s->keyframe || s->avctx->debug&512)){ | |
4604 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
4605 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4606 } |
6414 | 4607 start_y = FFMIN(b->height, start_y); |
4608 end_y = FFMIN(b->height, end_y); | |
4609 | |
4610 if (start_y != end_y){ | |
4611 if (orientation == 0){ | |
4612 SubBand * correlate_band = &p->band[0][0]; | |
4613 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
4614 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
4615 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
4616 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
4617 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
4618 } | |
4619 else | |
4620 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
4621 } | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4622 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4623 } |
6414 | 4624 |
4625 for(; yd<slice_h; yd+=4){ | |
4626 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
4627 } | |
4628 | |
4629 if(s->qlog == LOSSLESS_QLOG){ | |
4630 for(; yq<slice_h && yq<h; yq++){ | |
4631 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
4632 for(x=0; x<w; x++){ | |
4633 line[x] <<= FRAC_BITS; | |
4634 } | |
2161 | 4635 } |
4636 } | |
6414 | 4637 |
4638 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
4639 | |
4640 y = FFMIN(p->height, slice_starty); | |
4641 end_y = FFMIN(p->height, slice_h); | |
4642 while(y < end_y) | |
4643 slice_buffer_release(&s->sb, y++); | |
2161 | 4644 } |
2562 | 4645 |
6414 | 4646 slice_buffer_flush(&s->sb); |
4647 } | |
4648 | |
2138 | 4649 } |
2967 | 4650 |
2138 | 4651 emms_c(); |
4652 | |
9374 | 4653 release_buffer(avctx); |
2138 | 4654 |
6414 | 4655 if(!(s->avctx->debug&2048)) |
4656 *picture= s->current_picture; | |
4657 else | |
4658 *picture= s->mconly_picture; | |
2967 | 4659 |
2138 | 4660 *data_size = sizeof(AVFrame); |
2967 | 4661 |
2335 | 4662 bytes_read= c->bytestream - c->bytestream_start; |
4663 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2138 | 4664 |
4665 return bytes_read; | |
4666 } | |
4667 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4668 static av_cold int decode_end(AVCodecContext *avctx) |
2138 | 4669 { |
4670 SnowContext *s = avctx->priv_data; | |
4671 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4672 slice_buffer_destroy(&s->sb); |
2967 | 4673 |
2138 | 4674 common_end(s); |
4675 | |
4676 return 0; | |
4677 } | |
4678 | |
4679 AVCodec snow_decoder = { | |
4680 "snow", | |
4681 CODEC_TYPE_VIDEO, | |
4682 CODEC_ID_SNOW, | |
4683 sizeof(SnowContext), | |
4684 decode_init, | |
4685 NULL, | |
4686 decode_end, | |
4687 decode_frame, | |
4688 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
6712 | 4689 NULL, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
4690 .long_name = NULL_IF_CONFIG_SMALL("Snow"), |
2138 | 4691 }; |
4692 | |
8590 | 4693 #if CONFIG_SNOW_ENCODER |
2138 | 4694 AVCodec snow_encoder = { |
4695 "snow", | |
4696 CODEC_TYPE_VIDEO, | |
4697 CODEC_ID_SNOW, | |
4698 sizeof(SnowContext), | |
4699 encode_init, | |
4700 encode_frame, | |
4701 encode_end, | |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
4702 .long_name = NULL_IF_CONFIG_SMALL("Snow"), |
2138 | 4703 }; |
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4704 #endif |
2138 | 4705 |
4706 | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4707 #ifdef TEST |
2138 | 4708 #undef malloc |
4709 #undef free | |
4710 #undef printf | |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4711 |
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4712 #include "libavutil/lfg.h" |
2138 | 4713 |
5934 | 4714 int main(void){ |
2138 | 4715 int width=256; |
4716 int height=256; | |
4717 int buffer[2][width*height]; | |
4718 SnowContext s; | |
4719 int i; | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4720 AVLFG prng; |
2138 | 4721 s.spatial_decomposition_count=6; |
4722 s.spatial_decomposition_type=1; | |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4723 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4724 av_lfg_init(&prng, 1); |
2967 | 4725 |
2138 | 4726 printf("testing 5/3 DWT\n"); |
4727 for(i=0; i<width*height; i++) | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4728 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; |
2967 | 4729 |
2951 | 4730 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4731 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4732 |
2138 | 4733 for(i=0; i<width*height; i++) |
9197 | 4734 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); |
2138 | 4735 |
4736 printf("testing 9/7 DWT\n"); | |
4737 s.spatial_decomposition_type=0; | |
4738 for(i=0; i<width*height; i++) | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4739 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; |
2967 | 4740 |
2951 | 4741 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4742 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4743 |
2138 | 4744 for(i=0; i<width*height; i++) |
9197 | 4745 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); |
2967 | 4746 |
2951 | 4747 #if 0 |
2138 | 4748 printf("testing AC coder\n"); |
4749 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4750 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
2138 | 4751 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4752 |
2138 | 4753 for(i=-256; i<256; i++){ |
4001 | 4754 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
2138 | 4755 } |
2335 | 4756 ff_rac_terminate(&s.c); |
2138 | 4757 |
4758 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4759 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
2138 | 4760 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4761 |
2138 | 4762 for(i=-256; i<256; i++){ |
4763 int j; | |
4764 j= get_symbol(&s.c, s.header_state, 1); | |
4001 | 4765 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
2138 | 4766 } |
2951 | 4767 #endif |
6414 | 4768 { |
4769 int level, orientation, x, y; | |
4770 int64_t errors[8][4]; | |
4771 int64_t g=0; | |
4772 | |
4773 memset(errors, 0, sizeof(errors)); | |
4774 s.spatial_decomposition_count=3; | |
4775 s.spatial_decomposition_type=0; | |
4776 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4777 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4778 int w= width >> (s.spatial_decomposition_count-level); | |
4779 int h= height >> (s.spatial_decomposition_count-level); | |
4780 int stride= width << (s.spatial_decomposition_count-level); | |
4781 DWTELEM *buf= buffer[0]; | |
4782 int64_t error=0; | |
4783 | |
4784 if(orientation&1) buf+=w; | |
4785 if(orientation>1) buf+=stride>>1; | |
4786 | |
4787 memset(buffer[0], 0, sizeof(int)*width*height); | |
4788 buf[w/2 + h/2*stride]= 256*256; | |
4789 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
4790 for(y=0; y<height; y++){ | |
4791 for(x=0; x<width; x++){ | |
4792 int64_t d= buffer[0][x + y*width]; | |
4793 error += d*d; | |
4794 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); | |
4795 } | |
4796 if(FFABS(height/2-y)<9 && level==2) printf("\n"); | |
2138 | 4797 } |
6414 | 4798 error= (int)(sqrt(error)+0.5); |
4799 errors[level][orientation]= error; | |
8611 | 4800 if(g) g=av_gcd(g, error); |
6414 | 4801 else g= error; |
2138 | 4802 } |
4803 } | |
6414 | 4804 printf("static int const visual_weight[][4]={\n"); |
4805 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4806 printf(" {"); | |
4807 for(orientation=0; orientation<4; orientation++){ | |
4808 printf("%8"PRId64",", errors[level][orientation]/g); | |
4809 } | |
4810 printf("},\n"); | |
2138 | 4811 } |
6414 | 4812 printf("};\n"); |
4813 { | |
2138 | 4814 int level=2; |
4815 int w= width >> (s.spatial_decomposition_count-level); | |
6168 | 4816 //int h= height >> (s.spatial_decomposition_count-level); |
2138 | 4817 int stride= width << (s.spatial_decomposition_count-level); |
4818 DWTELEM *buf= buffer[0]; | |
4819 int64_t error=0; | |
4820 | |
4821 buf+=w; | |
4822 buf+=stride>>1; | |
2967 | 4823 |
2138 | 4824 memset(buffer[0], 0, sizeof(int)*width*height); |
4825 #if 1 | |
4826 for(y=0; y<height; y++){ | |
4827 for(x=0; x<width; x++){ | |
4828 int tab[4]={0,2,3,1}; | |
4829 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
4830 } | |
4831 } | |
2951 | 4832 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4833 #else |
4834 for(y=0; y<h; y++){ | |
4835 for(x=0; x<w; x++){ | |
4836 buf[x + y*stride ]=169; | |
4837 buf[x + y*stride-w]=64; | |
4838 } | |
4839 } | |
2951 | 4840 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4841 #endif |
4842 for(y=0; y<height; y++){ | |
4843 for(x=0; x<width; x++){ | |
4844 int64_t d= buffer[0][x + y*width]; | |
4845 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4846 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
2138 | 4847 } |
4001 | 4848 if(FFABS(height/2-y)<9) printf("\n"); |
2138 | 4849 } |
6414 | 4850 } |
4851 | |
2138 | 4852 } |
4853 return 0; | |
4854 } | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4855 #endif /* TEST */ |