Mercurial > libavcodec.hg
annotate zmbvenc.c @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 7dd2a45249a9 |
children |
rev | line source |
---|---|
4285 | 1 /* |
2 * Zip Motion Blocks Video (ZMBV) encoder | |
3 * Copyright (c) 2006 Konstantin Shishkov | |
4 * | |
5 * This file is part of FFmpeg. | |
6 * | |
7 * FFmpeg is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public | |
9 * License as published by the Free Software Foundation; either | |
10 * version 2.1 of the License, or (at your option) any later version. | |
11 * | |
12 * FFmpeg is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
18 * License along with FFmpeg; if not, write to the Free Software | |
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
20 */ | |
21 | |
22 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11560
diff
changeset
|
23 * @file |
4285 | 24 * Zip Motion Blocks Video encoder |
25 */ | |
26 | |
27 #include <stdio.h> | |
28 #include <stdlib.h> | |
29 | |
8573
2acf0ae7b041
Fix build: Add intreadwrite.h and bswap.h #includes where necessary.
diego
parents:
7886
diff
changeset
|
30 #include "libavutil/intreadwrite.h" |
4285 | 31 #include "avcodec.h" |
32 | |
33 #include <zlib.h> | |
34 | |
35 #define ZMBV_KEYFRAME 1 | |
36 #define ZMBV_DELTAPAL 2 | |
37 | |
38 #define ZMBV_BLOCK 16 | |
39 | |
40 /** | |
41 * Encoder context | |
42 */ | |
43 typedef struct ZmbvEncContext { | |
44 AVCodecContext *avctx; | |
45 AVFrame pic; | |
46 | |
47 int range; | |
48 uint8_t *comp_buf, *work_buf; | |
49 uint8_t pal[768]; | |
50 uint32_t pal2[256]; //for quick comparisons | |
51 uint8_t *prev; | |
52 int pstride; | |
53 int comp_size; | |
54 int keyint, curfrm; | |
55 z_stream zstream; | |
56 } ZmbvEncContext; | |
57 | |
6758 | 58 static int score_tab[256]; |
59 | |
4285 | 60 /** Block comparing function |
61 * XXX should be optimized and moved to DSPContext | |
62 * TODO handle out of edge ME | |
63 */ | |
10544
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
64 static inline int block_cmp(uint8_t *src, int stride, uint8_t *src2, int stride2, |
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
65 int bw, int bh, int *xored) |
4285 | 66 { |
67 int sum = 0; | |
68 int i, j; | |
10544
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
69 uint8_t histogram[256] = {0}; |
4285 | 70 |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
71 *xored = 0; |
4285 | 72 for(j = 0; j < bh; j++){ |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
73 for(i = 0; i < bw; i++){ |
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
74 int t = src[i] ^ src2[i]; |
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
75 histogram[t]++; |
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
76 *xored |= t; |
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
77 } |
4285 | 78 src += stride; |
79 src2 += stride2; | |
80 } | |
6758 | 81 |
10544
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
82 for(i = 1; i < 256; i++) |
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
83 sum += score_tab[histogram[i]]; |
6758 | 84 |
4285 | 85 return sum; |
86 } | |
87 | |
88 /** Motion estimation function | |
89 * TODO make better ME decisions | |
90 */ | |
10544
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
91 static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev, |
b9fdb6b4c2dc
cosmetics: break some long lines and insert few spaces
kostya
parents:
10543
diff
changeset
|
92 int pstride, int x, int y, int *mx, int *my, int *xored) |
4285 | 93 { |
4647 | 94 int dx, dy, tx, ty, tv, bv, bw, bh; |
4285 | 95 |
96 *mx = *my = 0; | |
4647 | 97 bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x); |
98 bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y); | |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
99 bv = block_cmp(src, sstride, prev, pstride, bw, bh, xored); |
4285 | 100 if(!bv) return 0; |
4647 | 101 for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height - bh); ty++){ |
102 for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, c->avctx->width - bw); tx++){ | |
4285 | 103 if(tx == x && ty == y) continue; // we already tested this block |
104 dx = tx - x; | |
105 dy = ty - y; | |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
106 tv = block_cmp(src, sstride, prev + dx + dy*pstride, pstride, bw, bh, xored); |
4285 | 107 if(tv < bv){ |
108 bv = tv; | |
109 *mx = dx; | |
110 *my = dy; | |
111 if(!bv) return 0; | |
112 } | |
113 } | |
114 } | |
115 return bv; | |
116 } | |
117 | |
118 static int encode_frame(AVCodecContext *avctx, uint8_t *buf, int buf_size, void *data) | |
119 { | |
4827 | 120 ZmbvEncContext * const c = avctx->priv_data; |
4285 | 121 AVFrame *pict = data; |
122 AVFrame * const p = &c->pic; | |
123 uint8_t *src, *prev; | |
124 uint32_t *palptr; | |
125 int len = 0; | |
126 int keyframe, chpal; | |
127 int fl; | |
128 int work_size = 0; | |
129 int bw, bh; | |
130 int i, j; | |
131 | |
132 keyframe = !c->curfrm; | |
4287 | 133 c->curfrm++; |
4285 | 134 if(c->curfrm == c->keyint) |
135 c->curfrm = 0; | |
136 *p = *pict; | |
137 p->pict_type= keyframe ? FF_I_TYPE : FF_P_TYPE; | |
138 p->key_frame= keyframe; | |
139 chpal = !keyframe && memcmp(p->data[1], c->pal2, 1024); | |
140 | |
141 fl = (keyframe ? ZMBV_KEYFRAME : 0) | (chpal ? ZMBV_DELTAPAL : 0); | |
142 *buf++ = fl; len++; | |
143 if(keyframe){ | |
144 deflateReset(&c->zstream); | |
145 *buf++ = 0; len++; // hi ver | |
146 *buf++ = 1; len++; // lo ver | |
147 *buf++ = 1; len++; // comp | |
148 *buf++ = 4; len++; // format - 8bpp | |
149 *buf++ = ZMBV_BLOCK; len++; // block width | |
150 *buf++ = ZMBV_BLOCK; len++; // block height | |
151 } | |
152 palptr = (uint32_t*)p->data[1]; | |
153 src = p->data[0]; | |
154 prev = c->prev; | |
155 if(chpal){ | |
156 uint8_t tpal[3]; | |
157 for(i = 0; i < 256; i++){ | |
5089 | 158 AV_WB24(tpal, palptr[i]); |
4285 | 159 c->work_buf[work_size++] = tpal[0] ^ c->pal[i * 3 + 0]; |
160 c->work_buf[work_size++] = tpal[1] ^ c->pal[i * 3 + 1]; | |
161 c->work_buf[work_size++] = tpal[2] ^ c->pal[i * 3 + 2]; | |
162 c->pal[i * 3 + 0] = tpal[0]; | |
163 c->pal[i * 3 + 1] = tpal[1]; | |
164 c->pal[i * 3 + 2] = tpal[2]; | |
165 } | |
166 memcpy(c->pal2, p->data[1], 1024); | |
167 } | |
168 if(keyframe){ | |
169 for(i = 0; i < 256; i++){ | |
5089 | 170 AV_WB24(c->pal+(i*3), palptr[i]); |
4285 | 171 } |
172 memcpy(c->work_buf, c->pal, 768); | |
173 memcpy(c->pal2, p->data[1], 1024); | |
174 work_size = 768; | |
175 for(i = 0; i < avctx->height; i++){ | |
176 memcpy(c->work_buf + work_size, src, avctx->width); | |
177 src += p->linesize[0]; | |
178 work_size += avctx->width; | |
179 } | |
180 }else{ | |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
181 int x, y, bh2, bw2, xored; |
4285 | 182 uint8_t *tsrc, *tprev; |
183 uint8_t *mv; | |
184 int mx, my, bv; | |
185 | |
186 bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK; | |
187 bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK; | |
188 mv = c->work_buf + work_size; | |
189 memset(c->work_buf + work_size, 0, (bw * bh * 2 + 3) & ~3); | |
190 work_size += (bw * bh * 2 + 3) & ~3; | |
191 /* for now just XOR'ing */ | |
192 for(y = 0; y < avctx->height; y += ZMBV_BLOCK) { | |
193 bh2 = FFMIN(avctx->height - y, ZMBV_BLOCK); | |
194 for(x = 0; x < avctx->width; x += ZMBV_BLOCK, mv += 2) { | |
195 bw2 = FFMIN(avctx->width - x, ZMBV_BLOCK); | |
196 | |
197 tsrc = src + x; | |
198 tprev = prev + x; | |
199 | |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
200 bv = zmbv_me(c, tsrc, p->linesize[0], tprev, c->pstride, x, y, &mx, &my, &xored); |
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
201 mv[0] = (mx << 1) | !!xored; |
4285 | 202 mv[1] = my << 1; |
203 tprev += mx + my * c->pstride; | |
10543
7ff7a34848bf
10l trocadero: ZMBV encoder used zero score to detect whether block should be
kostya
parents:
10397
diff
changeset
|
204 if(xored){ |
4285 | 205 for(j = 0; j < bh2; j++){ |
206 for(i = 0; i < bw2; i++) | |
207 c->work_buf[work_size++] = tsrc[i] ^ tprev[i]; | |
208 tsrc += p->linesize[0]; | |
209 tprev += c->pstride; | |
210 } | |
211 } | |
212 } | |
213 src += p->linesize[0] * ZMBV_BLOCK; | |
214 prev += c->pstride * ZMBV_BLOCK; | |
215 } | |
216 } | |
217 /* save the previous frame */ | |
218 src = p->data[0]; | |
219 prev = c->prev; | |
220 for(i = 0; i < avctx->height; i++){ | |
221 memcpy(prev, src, avctx->width); | |
222 prev += c->pstride; | |
223 src += p->linesize[0]; | |
224 } | |
225 | |
226 c->zstream.next_in = c->work_buf; | |
227 c->zstream.avail_in = work_size; | |
228 c->zstream.total_in = 0; | |
229 | |
230 c->zstream.next_out = c->comp_buf; | |
231 c->zstream.avail_out = c->comp_size; | |
232 c->zstream.total_out = 0; | |
9621 | 233 if(deflate(&c->zstream, Z_SYNC_FLUSH) != Z_OK){ |
4285 | 234 av_log(avctx, AV_LOG_ERROR, "Error compressing data\n"); |
235 return -1; | |
236 } | |
237 | |
238 memcpy(buf, c->comp_buf, c->zstream.total_out); | |
239 return len + c->zstream.total_out; | |
240 } | |
241 | |
242 | |
243 /** | |
244 * Init zmbv encoder | |
245 */ | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
5215
diff
changeset
|
246 static av_cold int encode_init(AVCodecContext *avctx) |
4285 | 247 { |
4827 | 248 ZmbvEncContext * const c = avctx->priv_data; |
4285 | 249 int zret; // Zlib return code |
6758 | 250 int i; |
4285 | 251 int lvl = 9; |
252 | |
6758 | 253 for(i=1; i<256; i++) |
6818 | 254 score_tab[i]= -i * log(i/(double)(ZMBV_BLOCK*ZMBV_BLOCK)) * (256/M_LN2); |
6758 | 255 |
4285 | 256 c->avctx = avctx; |
257 | |
258 c->curfrm = 0; | |
259 c->keyint = avctx->keyint_min; | |
260 c->range = 8; | |
261 if(avctx->me_range > 0) | |
262 c->range = FFMIN(avctx->me_range, 127); | |
263 | |
264 if(avctx->compression_level >= 0) | |
265 lvl = avctx->compression_level; | |
266 if(lvl < 0 || lvl > 9){ | |
267 av_log(avctx, AV_LOG_ERROR, "Compression level should be 0-9, not %i\n", lvl); | |
268 return -1; | |
269 } | |
270 | |
271 // Needed if zlib unused or init aborted before deflateInit | |
272 memset(&(c->zstream), 0, sizeof(z_stream)); | |
273 c->comp_size = avctx->width * avctx->height + 1024 + | |
274 ((avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK) * ((avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK) * 2 + 4; | |
275 if ((c->work_buf = av_malloc(c->comp_size)) == NULL) { | |
276 av_log(avctx, AV_LOG_ERROR, "Can't allocate work buffer.\n"); | |
277 return -1; | |
278 } | |
279 /* Conservative upper bound taken from zlib v1.2.1 source via lcl.c */ | |
280 c->comp_size = c->comp_size + ((c->comp_size + 7) >> 3) + | |
281 ((c->comp_size + 63) >> 6) + 11; | |
282 | |
283 /* Allocate compression buffer */ | |
284 if ((c->comp_buf = av_malloc(c->comp_size)) == NULL) { | |
285 av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n"); | |
286 return -1; | |
287 } | |
9686
bc32976d6d9d
Move ALIGN macro to libavutil/common.h and use it in various places
conrad
parents:
9621
diff
changeset
|
288 c->pstride = FFALIGN(avctx->width, 16); |
4285 | 289 if ((c->prev = av_malloc(c->pstride * avctx->height)) == NULL) { |
290 av_log(avctx, AV_LOG_ERROR, "Can't allocate picture.\n"); | |
291 return -1; | |
292 } | |
293 | |
294 c->zstream.zalloc = Z_NULL; | |
295 c->zstream.zfree = Z_NULL; | |
296 c->zstream.opaque = Z_NULL; | |
297 zret = deflateInit(&(c->zstream), lvl); | |
298 if (zret != Z_OK) { | |
299 av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret); | |
300 return -1; | |
301 } | |
302 | |
7886 | 303 avctx->coded_frame = (AVFrame*)&c->pic; |
304 | |
4285 | 305 return 0; |
306 } | |
307 | |
308 | |
309 | |
310 /** | |
4368 | 311 * Uninit zmbv encoder |
4285 | 312 */ |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
5215
diff
changeset
|
313 static av_cold int encode_end(AVCodecContext *avctx) |
4285 | 314 { |
4827 | 315 ZmbvEncContext * const c = avctx->priv_data; |
4285 | 316 |
317 av_freep(&c->comp_buf); | |
318 av_freep(&c->work_buf); | |
319 | |
320 deflateEnd(&(c->zstream)); | |
321 av_freep(&c->prev); | |
322 | |
323 return 0; | |
324 } | |
325 | |
326 AVCodec zmbv_encoder = { | |
327 "zmbv", | |
11560
8a4984c5cacc
Define AVMediaType enum, and use it instead of enum CodecType, which
stefano
parents:
10544
diff
changeset
|
328 AVMEDIA_TYPE_VIDEO, |
4285 | 329 CODEC_ID_ZMBV, |
330 sizeof(ZmbvEncContext), | |
331 encode_init, | |
332 encode_frame, | |
333 encode_end, | |
10146
38cfe222e1a4
Mark all pix_fmts and supported_framerates compound literals as const.
reimar
parents:
9686
diff
changeset
|
334 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_PAL8, PIX_FMT_NONE}, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6818
diff
changeset
|
335 .long_name = NULL_IF_CONFIG_SMALL("Zip Motion Blocks Video"), |
4285 | 336 }; |