libavcodec.hg: snow.c annotate

annotate snow.c @ 5596:051caa9c1ba5 libavcodec

simplify senselessly complex addressing

author	michael
date	Sun, 26 Aug 2007 02:02:14 +0000
parents	946c2db0a093
children	a920d9b58f19

rev	line source
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1 /*
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3920 diff changeset	4 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3920 diff changeset	5 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3920 diff changeset	6 * FFmpeg is free software; you can redistribute it and/or
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	7 * modify it under the terms of the GNU Lesser General Public
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	8 * License as published by the Free Software Foundation; either
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3920 diff changeset	9 * version 2.1 of the License, or (at your option) any later version.
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	10 *
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3920 diff changeset	11 * FFmpeg is distributed in the hope that it will be useful,
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	14 * Lesser General Public License for more details.
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	15 *
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	16 * You should have received a copy of the GNU Lesser General Public
3947 c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library' diego parents: 3920 diff changeset	17 * License along with FFmpeg; if not, write to the Free Software
3036 0b546eab515d Update licensing information: The FSF changed postal address. diego parents: 3035 diff changeset	18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	19 */
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	20
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	21 #include "avcodec.h"
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	22 #include "dsputil.h"
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	23 #include "snow.h"
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	24
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	25 #include "rangecoder.h"
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	26
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	27 #include "mpegvideo.h"
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	28
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	29 #undef NDEBUG
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	30 #include <assert.h>
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	31
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	32 static const int8_t quant3[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	49 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	50 static const int8_t quant3b[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	67 };
2596 3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	68 static const int8_t quant3bA[256]={
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	85 };
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	86 static const int8_t quant5[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	103 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	104 static const int8_t quant7[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	121 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	122 static const int8_t quant9[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	139 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	140 static const int8_t quant11[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	157 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	158 static const int8_t quant13[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	175 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	176
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	177 #if 0 //64*cubic
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	178 static const uint8_t obmc32[1024]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	211 //error:0.000022
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	212 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	213 static const uint8_t obmc16[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	230 //error:0.000033
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	231 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	232 #elif 1 // 64*linear
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	233 static const uint8_t obmc32[1024]={
3206 c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	266 //error:0.000020
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	267 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	268 static const uint8_t obmc16[256]={
3206 c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	285 //error:0.000015
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	286 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	287 #else //64*cos
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	288 static const uint8_t obmc32[1024]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	321 //error:0.000022
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	322 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	323 static const uint8_t obmc16[256]={
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	340 //error:0.000022
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	341 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	342 #endif
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	343
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	344 //linear *64
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	345 static const uint8_t obmc8[64]={
3206 c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	346 4, 12, 20, 28, 28, 20, 12, 4,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	347 12, 36, 60, 84, 84, 60, 36, 12,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	348 20, 60,100,140,140,100, 60, 20,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	349 28, 84,140,196,196,140, 84, 28,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	350 28, 84,140,196,196,140, 84, 28,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	351 20, 60,100,140,140,100, 60, 20,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	352 12, 36, 60, 84, 84, 60, 36, 12,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	353 4, 12, 20, 28, 28, 20, 12, 4,
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	354 //error:0.000000
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	355 };
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	356
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	357 //linear *64
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	358 static const uint8_t obmc4[16]={
3206 c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	359 16, 48, 48, 16,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	360 48,144,144, 48,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	361 48,144,144, 48,
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	362 16, 48, 48, 16,
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	363 //error:0.000000
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	364 };
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	365
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	366 static const uint8_t *obmc_tab[4]={
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	367 obmc32, obmc16, obmc8, obmc4
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	368 };
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	369
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	371
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	372 typedef struct BlockNode{
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	373 int16_t mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	374 int16_t my;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	375 uint8_t ref;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	376 uint8_t color[3];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	377 uint8_t type;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	378 //#define TYPE_SPLIT 1
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	379 #define BLOCK_INTRA 1
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	380 #define BLOCK_OPT 2
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	381 //#define TYPE_NOCOLOR 4
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	382 uint8_t level; //FIXME merge into type?
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	383 }BlockNode;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	384
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	385 static const BlockNode null_block= { //FIXME add border maybe
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	386 .color= {128,128,128},
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	387 .mx= 0,
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	388 .my= 0,
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	389 .ref= 0,
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	390 .type= 0,
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	391 .level= 0,
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	392 };
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	393
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	394 #define LOG2_MB_SIZE 4
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	395 #define MB_SIZE (1<<LOG2_MB_SIZE)
5575 a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	396 #define ENCODER_EXTRA_BITS 4
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	397
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	398 typedef struct x_and_coeff{
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	399 int16_t x;
2596 3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	400 uint16_t coeff;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	401 } x_and_coeff;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	402
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	403 typedef struct SubBand{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	404 int level;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	405 int stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	406 int width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	407 int height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	408 int qlog; ///< log(qscale)/log[2^(1/6)]
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	409 DWTELEM *buf;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	410 IDWTELEM *ibuf;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	411 int buf_x_offset;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	412 int buf_y_offset;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	413 int stride_line; ///< Stride measured in lines, not pixels.
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	414 x_and_coeff * x_coeff;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	415 struct SubBand *parent;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	416 uint8_t state[/72*/ 7 + 512][32];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	417 }SubBand;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	418
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	419 typedef struct Plane{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	420 int width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	421 int height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	422 SubBand band[MAX_DECOMPOSITIONS][4];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	423 }Plane;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	424
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	425 typedef struct SnowContext{
4588 fc155ff94878 cosmetics: Fix another common typo, dependAnt --> dependEnt. diego parents: 4494 diff changeset	426 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	427
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	428 AVCodecContext *avctx;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	429 RangeCoder c;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	430 DSPContext dsp;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	431 AVFrame new_picture;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	432 AVFrame input_picture; ///< new_picture with the internal linesizes
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	433 AVFrame current_picture;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	434 AVFrame last_picture[MAX_REF_FRAMES];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	435 AVFrame mconly_picture;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	436 // uint8_t q_context[16];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	437 uint8_t header_state[32];
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	438 uint8_t block_state[128 + 32*128];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	439 int keyframe;
2199 e0b08bdf565d 10l (keyframes and context resets) michael parents: 2198 diff changeset	440 int always_reset;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	441 int version;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	442 int spatial_decomposition_type;
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	443 int last_spatial_decomposition_type;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	444 int temporal_decomposition_type;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	445 int spatial_decomposition_count;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	446 int temporal_decomposition_count;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	447 int max_ref_frames;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	448 int ref_frames;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	449 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	450 uint32_t *ref_scores[MAX_REF_FRAMES];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	451 DWTELEM *spatial_dwt_buffer;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	452 IDWTELEM *spatial_idwt_buffer;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	453 int colorspace_type;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	454 int chroma_h_shift;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	455 int chroma_v_shift;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	456 int spatial_scalability;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	457 int qlog;
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	458 int last_qlog;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	459 int lambda;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	460 int lambda2;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	461 int pass1_rc;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	462 int mv_scale;
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	463 int last_mv_scale;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	464 int qbias;
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	465 int last_qbias;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	466 #define QBIAS_SHIFT 3
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	467 int b_width;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	468 int b_height;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	469 int block_max_depth;
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	470 int last_block_max_depth;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	471 Plane plane[MAX_PLANES];
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	472 BlockNode *block;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	473 #define ME_CACHE_SIZE 1024
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	474 int me_cache[ME_CACHE_SIZE];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	475 int me_cache_generation;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	476 slice_buffer sb;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	477
4588 fc155ff94878 cosmetics: Fix another common typo, dependAnt --> dependEnt. diego parents: 4494 diff changeset	478 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	479 }SnowContext;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	480
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	481 typedef struct {
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	482 IDWTELEM *b0;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	483 IDWTELEM *b1;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	484 IDWTELEM *b2;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	485 IDWTELEM *b3;
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	486 int y;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	487 } dwt_compose_t;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	488
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	489 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	490 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	491
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	492 static void iterative_me(SnowContext *s);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	493
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	494 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	495 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	496 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	497
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	498 buf->base_buffer = base_buffer;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	499 buf->line_count = line_count;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	500 buf->line_width = line_width;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	501 buf->data_count = max_allocated_lines;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	502 buf->line = av_mallocz (sizeof(IDWTELEM ) line_count);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	503 buf->data_stack = av_malloc (sizeof(IDWTELEM ) max_allocated_lines);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	504
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	505 for (i = 0; i < max_allocated_lines; i++)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	506 {
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	507 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	508 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	509
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	510 buf->data_stack_top = max_allocated_lines - 1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	511 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	512
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	513 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	514 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	515 int offset;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	516 IDWTELEM * buffer;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	517
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	518 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	519
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	520 assert(buf->data_stack_top >= 0);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	521 // assert(!buf->line[line]);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	522 if (buf->line[line])
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	523 return buf->line[line];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	524
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	525 offset = buf->line_width * line;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	526 buffer = buf->data_stack[buf->data_stack_top];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	527 buf->data_stack_top--;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	528 buf->line[line] = buffer;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	529
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	530 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	531
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	532 return buffer;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	533 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	534
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	535 static void slice_buffer_release(slice_buffer * buf, int line)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	536 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	537 int offset;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	538 IDWTELEM * buffer;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	539
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	540 assert(line >= 0 && line < buf->line_count);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	541 assert(buf->line[line]);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	542
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	543 offset = buf->line_width * line;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	544 buffer = buf->line[line];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	545 buf->data_stack_top++;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	546 buf->data_stack[buf->data_stack_top] = buffer;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	547 buf->line[line] = NULL;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	548
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	549 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	550 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	551
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	552 static void slice_buffer_flush(slice_buffer * buf)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	553 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	554 int i;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	555 for (i = 0; i < buf->line_count; i++)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	556 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	557 if (buf->line[i])
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	558 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	559 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	560 slice_buffer_release(buf, i);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	561 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	562 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	563 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	564
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	565 static void slice_buffer_destroy(slice_buffer * buf)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	566 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	567 int i;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	568 slice_buffer_flush(buf);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	569
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	570 for (i = buf->data_count - 1; i >= 0; i--)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	571 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	572 assert(buf->data_stack[i]);
3190 e9fa3ac61966 av_free -> av_freep michael parents: 3189 diff changeset	573 av_freep(&buf->data_stack[i]);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	574 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	575 assert(buf->data_stack);
3190 e9fa3ac61966 av_free -> av_freep michael parents: 3189 diff changeset	576 av_freep(&buf->data_stack);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	577 assert(buf->line);
3190 e9fa3ac61966 av_free -> av_freep michael parents: 3189 diff changeset	578 av_freep(&buf->line);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	579 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	580
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2975 diff changeset	581 #ifdef __sgi
2368 a7ac68734a91 fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be}) michael parents: 2335 diff changeset	582 // Avoid a name clash on SGI IRIX
2979 bfabfdf9ce55 COSMETICS: tabs --> spaces, some prettyprinting diego parents: 2975 diff changeset	583 #undef qexp
2368 a7ac68734a91 fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be}) michael parents: 2335 diff changeset	584 #endif
2246 3414ac0b8c55 8 -> FRAC_BITS michael parents: 2241 diff changeset	585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	586 static uint8_t qexp[QROOT];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	587
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	588 static inline int mirror(int v, int m){
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	589 while((unsigned)v > (unsigned)m){
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	590 v=-v;
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	591 if(v<0) v+= 2*m;
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	592 }
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	593 return v;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	594 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	595
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	596 static inline void put_symbol(RangeCoder c, uint8_t state, int v, int is_signed){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	597 int i;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	598
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	599 if(v){
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	600 const int a= FFABS(v);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	601 const int e= av_log2(a);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	602 #if 1
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	603 const int el= FFMIN(e, 10);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	604 put_rac(c, state+0, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	605
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	606 for(i=0; i<el; i++){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	607 put_rac(c, state+1+i, 1); //1..10
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	608 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	609 for(; i<e; i++){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	610 put_rac(c, state+1+9, 1); //1..10
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	611 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	612 put_rac(c, state+1+FFMIN(i,9), 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	613
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	614 for(i=e-1; i>=el; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	615 put_rac(c, state+22+9, (a>>i)&1); //22..31
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	616 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	617 for(; i>=0; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	618 put_rac(c, state+22+i, (a>>i)&1); //22..31
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	619 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	620
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	621 if(is_signed)
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	622 put_rac(c, state+11 + el, v < 0); //11..21
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	623 #else
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	624
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	625 put_rac(c, state+0, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	626 if(e<=9){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	627 for(i=0; i<e; i++){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	628 put_rac(c, state+1+i, 1); //1..10
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	629 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	630 put_rac(c, state+1+i, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	631
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	632 for(i=e-1; i>=0; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	633 put_rac(c, state+22+i, (a>>i)&1); //22..31
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	634 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	635
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	636 if(is_signed)
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	637 put_rac(c, state+11 + e, v < 0); //11..21
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	638 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	639 for(i=0; i<e; i++){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	641 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	642 put_rac(c, state+1+FFMIN(i,9), 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	643
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	644 for(i=e-1; i>=0; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	646 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	647
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	648 if(is_signed)
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	650 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	651 #endif
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	652 }else{
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	653 put_rac(c, state+0, 1);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	654 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	655 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	656
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	657 static inline int get_symbol(RangeCoder c, uint8_t state, int is_signed){
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	658 if(get_rac(c, state+0))
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	659 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	660 else{
2240 c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	661 int i, e, a;
c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	662 e= 0;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
2240 c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	664 e++;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	665 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	666
2240 c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	667 a= 1;
c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	668 for(i=e-1; i>=0; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
2240 c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	670 }
c46fed9b7575 simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>) michael parents: 2232 diff changeset	671
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	673 return -a;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	674 else
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	675 return a;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	676 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	677 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	678
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	679 static inline void put_symbol2(RangeCoder c, uint8_t state, int v, int log2){
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	680 int i;
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	681 int r= log2>=0 ? 1<<log2 : 1;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	682
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	683 assert(v>=0);
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	684 assert(log2>=-4);
7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	685
7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	686 while(v >= r){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	687 put_rac(c, state+4+log2, 1);
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	688 v -= r;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	689 log2++;
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	690 if(log2>0) r+=r;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	691 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	692 put_rac(c, state+4+log2, 0);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	693
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	694 for(i=log2-1; i>=0; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	695 put_rac(c, state+31-i, (v>>i)&1);
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	696 }
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	697 }
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	698
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	699 static inline int get_symbol2(RangeCoder c, uint8_t state, int log2){
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	700 int i;
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	701 int r= log2>=0 ? 1<<log2 : 1;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	702 int v=0;
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	703
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	704 assert(log2>=-4);
7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	705
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	706 while(get_rac(c, state+4+log2)){
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	707 v+= r;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	708 log2++;
2159 7f42295c1517 improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8) michael parents: 2156 diff changeset	709 if(log2>0) r+=r;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	710 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	711
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	712 for(i=log2-1; i>=0; i--){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	713 v+= get_rac(c, state+31-i)<<i;
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	714 }
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	715
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	716 return v;
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	717 }
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	718
4283 d6f83e2f8804 rename always_inline to av_always_inline and move to common.h mru parents: 4197 diff changeset	719 static av_always_inline void lift(DWTELEM dst, DWTELEM src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	720 const int mirror_left= !highpass;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	721 const int mirror_right= (width&1) ^ highpass;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	722 const int w= (width>>1) - 1 + (highpass & width);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	723 int i;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	724
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	725 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	726 if(mirror_left){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	727 dst[0] = LIFT(src[0], ((mul2ref[0]+add)>>shift), inverse);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	728 dst += dst_step;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	729 src += src_step;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	730 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	731
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	732 for(i=0; i<w; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	733 dst[idst_step] = LIFT(src[isrc_step], ((mul(ref[iref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	734 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	735
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	736 if(mirror_right){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	737 dst[wdst_step] = LIFT(src[wsrc_step], ((mul2ref[w*ref_step]+add)>>shift), inverse);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	738 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	739 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	740
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	741 static av_always_inline void inv_lift(IDWTELEM dst, IDWTELEM src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	742 const int mirror_left= !highpass;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	743 const int mirror_right= (width&1) ^ highpass;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	744 const int w= (width>>1) - 1 + (highpass & width);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	745 int i;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	746
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	747 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	748 if(mirror_left){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	749 dst[0] = LIFT(src[0], ((mul2ref[0]+add)>>shift), inverse);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	750 dst += dst_step;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	751 src += src_step;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	752 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	753
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	754 for(i=0; i<w; i++){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	755 dst[idst_step] = LIFT(src[isrc_step], ((mul(ref[iref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	756 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	757
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	758 if(mirror_right){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	759 dst[wdst_step] = LIFT(src[wsrc_step], ((mul2ref[w*ref_step]+add)>>shift), inverse);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	760 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	761 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	762
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	763 #ifndef liftS
4283 d6f83e2f8804 rename always_inline to av_always_inline and move to common.h mru parents: 4197 diff changeset	764 static av_always_inline void liftS(DWTELEM dst, DWTELEM src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	765 const int mirror_left= !highpass;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	766 const int mirror_right= (width&1) ^ highpass;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	767 const int w= (width>>1) - 1 + (highpass & width);
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	768 int i;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	769
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	770 assert(shift == 4);
5572 c6fac563ec28 simplify michael parents: 5565 diff changeset	771 #define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4(src))>>shift): -((-16(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	772 if(mirror_left){
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	773 dst[0] = LIFTS(src[0], mul2ref[0]+add, inverse);
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	774 dst += dst_step;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	775 src += src_step;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	776 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	777
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	778 for(i=0; i<w; i++){
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	779 dst[idst_step] = LIFTS(src[isrc_step], mul(ref[iref_step] + ref[(i+1)*ref_step])+add, inverse);
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	780 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	781
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	782 if(mirror_right){
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	783 dst[wdst_step] = LIFTS(src[wsrc_step], mul2ref[w*ref_step]+add, inverse);
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	784 }
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	785 }
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	786 static av_always_inline void inv_liftS(IDWTELEM dst, IDWTELEM src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	787 const int mirror_left= !highpass;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	788 const int mirror_right= (width&1) ^ highpass;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	789 const int w= (width>>1) - 1 + (highpass & width);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	790 int i;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	791
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	792 assert(shift == 4);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	793 #define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4(src))>>shift): -((-16(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	794 if(mirror_left){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	795 dst[0] = LIFTS(src[0], mul2ref[0]+add, inverse);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	796 dst += dst_step;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	797 src += src_step;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	798 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	799
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	800 for(i=0; i<w; i++){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	801 dst[idst_step] = LIFTS(src[isrc_step], mul(ref[iref_step] + ref[(i+1)*ref_step])+add, inverse);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	802 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	803
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	804 if(mirror_right){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	805 dst[wdst_step] = LIFTS(src[wsrc_step], mul2ref[w*ref_step]+add, inverse);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	806 }
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	807 }
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	808 #endif
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	809
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	810 static void horizontal_decompose53i(DWTELEM *b, int width){
c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	811 DWTELEM temp[width];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	812 const int width2= width>>1;
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	813 int x;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	814 const int w2= (width+1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	815
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	816 for(x=0; x<width2; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	817 temp[x ]= b[2*x ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	818 temp[x+w2]= b[2*x + 1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	819 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	820 if(width&1)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	821 temp[x ]= b[2*x ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	822 #if 0
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	823 {
6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	824 int A1,A2,A3,A4;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	825 A2= temp[1 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	826 A4= temp[0 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	827 A1= temp[0+width2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	828 A1 -= (A2 + A4)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	829 A4 += (A1 + 1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	830 b[0+width2] = A1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	831 b[0 ] = A4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	832 for(x=1; x+1<width2; x+=2){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	833 A3= temp[x+width2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	834 A4= temp[x+1 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	835 A3 -= (A2 + A4)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	836 A2 += (A1 + A3 + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	837 b[x+width2] = A3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	838 b[x ] = A2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	839
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	840 A1= temp[x+1+width2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	841 A2= temp[x+2 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	842 A1 -= (A2 + A4)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	843 A4 += (A1 + A3 + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	844 b[x+1+width2] = A1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	845 b[x+1 ] = A4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	846 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	847 A3= temp[width-1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	848 A3 -= A2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	849 A2 += (A1 + A3 + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	850 b[width -1] = A3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	851 b[width2-1] = A2;
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	852 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	853 #else
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	854 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	855 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	856 #endif
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	857 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	858
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	859 static void vertical_decompose53iH0(DWTELEM b0, DWTELEM b1, DWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	860 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	861
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	862 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	863 b1[i] -= (b0[i] + b2[i])>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	864 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	865 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	866
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	867 static void vertical_decompose53iL0(DWTELEM b0, DWTELEM b1, DWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	868 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	869
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	870 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	871 b1[i] += (b0[i] + b2[i] + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	872 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	873 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	874
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	875 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
2198 970c2de19b2e cleanup michael parents: 2197 diff changeset	876 int y;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	877 DWTELEM b0= buffer + mirror(-2-1, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	878 DWTELEM b1= buffer + mirror(-2 , height-1)stride;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	879
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	880 for(y=-2; y<height; y+=2){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	881 DWTELEM b2= buffer + mirror(y+1, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	882 DWTELEM b3= buffer + mirror(y+2, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	883
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	884 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	885 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	886 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	887 STOP_TIMER("horizontal_decompose53i")}
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	888
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	889 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	890 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	891 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	892 STOP_TIMER("vertical_decompose53i*")}
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	893
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	894 b0=b2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	895 b1=b3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	896 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	897 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	898
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	899 static void horizontal_decompose97i(DWTELEM *b, int width){
c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	900 DWTELEM temp[width];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	901 const int w2= (width+1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	902
5565 93082c591c8b Change rounding of the horizontal DWT to match the vertical one. michael parents: 5551 diff changeset	903 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
93082c591c8b Change rounding of the horizontal DWT to match the vertical one. michael parents: 5551 diff changeset	904 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
5589 946c2db0a093 cleanup (remove some old experimentation related code) michael parents: 5588 diff changeset	905 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	906 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	907 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	908
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	909
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	910 static void vertical_decompose97iH0(DWTELEM b0, DWTELEM b1, DWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	911 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	912
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	913 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	914 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	915 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	916 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	917
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	918 static void vertical_decompose97iH1(DWTELEM b0, DWTELEM b1, DWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	919 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	920
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	921 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	922 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	923 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	924 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	925
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	926 static void vertical_decompose97iL0(DWTELEM b0, DWTELEM b1, DWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	927 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	928
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	929 for(i=0; i<width; i++){
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	930 #ifdef liftS
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	931 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	932 #else
5565 93082c591c8b Change rounding of the horizontal DWT to match the vertical one. michael parents: 5551 diff changeset	933 b1[i] = (164b1[i] - 4(b0[i] + b2[i]) + W_BO5 + (5<<27)) / (5*16) - (1<<23);
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	934 #endif
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	935 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	936 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	937
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	938 static void vertical_decompose97iL1(DWTELEM b0, DWTELEM b1, DWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	939 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	940
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	941 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	942 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	943 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	944 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	945
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	946 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
2198 970c2de19b2e cleanup michael parents: 2197 diff changeset	947 int y;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	948 DWTELEM b0= buffer + mirror(-4-1, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	949 DWTELEM b1= buffer + mirror(-4 , height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	950 DWTELEM b2= buffer + mirror(-4+1, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	951 DWTELEM b3= buffer + mirror(-4+2, height-1)stride;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	952
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	953 for(y=-4; y<height; y+=2){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	954 DWTELEM b4= buffer + mirror(y+3, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	955 DWTELEM b5= buffer + mirror(y+4, height-1)stride;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	956
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	957 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	958 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	959 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	960 if(width>400){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	961 STOP_TIMER("horizontal_decompose97i")
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	962 }}
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	963
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	964 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	965 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	966 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	967 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	968 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	969
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	970 if(width>400){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	971 STOP_TIMER("vertical_decompose97i")
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	972 }}
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	973
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	974 b0=b2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	975 b1=b3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	976 b2=b4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	977 b3=b5;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	978 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	979 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	980
2241 c26038875ebc consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>) michael parents: 2240 diff changeset	981 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	982 int level;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	983
2164 cbac56a6244f cleanup michael parents: 2161 diff changeset	984 for(level=0; level<decomposition_count; level++){
cbac56a6244f cleanup michael parents: 2161 diff changeset	985 switch(type){
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	986 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	987 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	988 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	989 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	990 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	991
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	992 static void horizontal_compose53i(IDWTELEM *b, int width){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	993 IDWTELEM temp[width];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	994 const int width2= width>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	995 const int w2= (width+1)>>1;
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	996 int x;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	997
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	998 #if 0
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	999 int A1,A2,A3,A4;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1000 A2= temp[1 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1001 A4= temp[0 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1002 A1= temp[0+width2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1003 A1 -= (A2 + A4)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1004 A4 += (A1 + 1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1005 b[0+width2] = A1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1006 b[0 ] = A4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1007 for(x=1; x+1<width2; x+=2){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1008 A3= temp[x+width2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1009 A4= temp[x+1 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1010 A3 -= (A2 + A4)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1011 A2 += (A1 + A3 + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1012 b[x+width2] = A3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1013 b[x ] = A2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1014
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1015 A1= temp[x+1+width2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1016 A2= temp[x+2 ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1017 A1 -= (A2 + A4)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1018 A4 += (A1 + A3 + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1019 b[x+1+width2] = A1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1020 b[x+1 ] = A4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1021 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1022 A3= temp[width-1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1023 A3 -= A2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1024 A2 += (A1 + A3 + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1025 b[width -1] = A3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1026 b[width2-1] = A2;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1027 #else
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1028 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1029 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1030 #endif
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1031 for(x=0; x<width2; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1032 b[2*x ]= temp[x ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1033 b[2*x + 1]= temp[x+w2];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1034 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1035 if(width&1)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1036 b[2*x ]= temp[x ];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1037 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1038
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1039 static void vertical_compose53iH0(IDWTELEM b0, IDWTELEM b1, IDWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1040 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1041
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1042 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1043 b1[i] += (b0[i] + b2[i])>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1044 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1045 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1046
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1047 static void vertical_compose53iL0(IDWTELEM b0, IDWTELEM b1, IDWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1048 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1049
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1050 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1051 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1052 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1053 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1054
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1055 static void spatial_compose53i_buffered_init(dwt_compose_t cs, slice_buffer sb, int height, int stride_line){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1056 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1057 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1058 cs->y = -1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1059 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1060
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1061 static void spatial_compose53i_init(dwt_compose_t cs, IDWTELEM buffer, int height, int stride){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1062 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1063 cs->b1 = buffer + mirror(-1 , height-1)*stride;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1064 cs->y = -1;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1065 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1066
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1067 static void spatial_compose53i_dy_buffered(dwt_compose_t cs, slice_buffer sb, int width, int height, int stride_line){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1068 int y= cs->y;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1069
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1070 IDWTELEM *b0= cs->b0;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1071 IDWTELEM *b1= cs->b1;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1072 IDWTELEM b2= slice_buffer_get_line(sb, mirror(y+1, height-1) stride_line);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1073 IDWTELEM b3= slice_buffer_get_line(sb, mirror(y+2, height-1) stride_line);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1074
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1075 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1076 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1077 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1078 STOP_TIMER("vertical_compose53i*")}
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1079
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1080 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1081 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1082 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1083 STOP_TIMER("horizontal_compose53i")}
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1084
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1085 cs->b0 = b2;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1086 cs->b1 = b3;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1087 cs->y += 2;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1088 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1089
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1090 static void spatial_compose53i_dy(dwt_compose_t cs, IDWTELEM buffer, int width, int height, int stride){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1091 int y= cs->y;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1092 IDWTELEM *b0= cs->b0;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1093 IDWTELEM *b1= cs->b1;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1094 IDWTELEM b2= buffer + mirror(y+1, height-1)stride;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1095 IDWTELEM b3= buffer + mirror(y+2, height-1)stride;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1096
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1097 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1098 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1099 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1100 STOP_TIMER("vertical_compose53i*")}
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1101
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1102 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1103 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1104 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1105 STOP_TIMER("horizontal_compose53i")}
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1106
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1107 cs->b0 = b2;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1108 cs->b1 = b3;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1109 cs->y += 2;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1110 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1111
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1112 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1113 dwt_compose_t cs;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1114 spatial_compose53i_init(&cs, buffer, height, stride);
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1115 while(cs.y <= height)
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1116 spatial_compose53i_dy(&cs, buffer, width, height, stride);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1117 }
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1118
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1119
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1120 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1121 IDWTELEM temp[width];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1122 const int w2= (width+1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1123
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1124 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
5589 946c2db0a093 cleanup (remove some old experimentation related code) michael parents: 5588 diff changeset	1125 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1126 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1127 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1128 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1129
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1130 static void vertical_compose97iH0(IDWTELEM b0, IDWTELEM b1, IDWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1131 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1132
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1133 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1134 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1135 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1136 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1137
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1138 static void vertical_compose97iH1(IDWTELEM b0, IDWTELEM b1, IDWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1139 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1140
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1141 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1142 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1143 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1144 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1145
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1146 static void vertical_compose97iL0(IDWTELEM b0, IDWTELEM b1, IDWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1147 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1148
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1149 for(i=0; i<width; i++){
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1150 #ifdef liftS
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1151 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1152 #else
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1153 b1[i] += (W_BM(b0[i] + b2[i])+4b1[i]+W_BO)>>W_BS;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1154 #endif
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1155 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1156 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1157
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1158 static void vertical_compose97iL1(IDWTELEM b0, IDWTELEM b1, IDWTELEM *b2, int width){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1159 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1160
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1161 for(i=0; i<width; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1162 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1163 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1164 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1165
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1166 void ff_snow_vertical_compose97i(IDWTELEM b0, IDWTELEM b1, IDWTELEM b2, IDWTELEM b3, IDWTELEM b4, IDWTELEM b5, int width){
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1167 int i;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1168
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1169 for(i=0; i<width; i++){
b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1170 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1171 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1172 #ifdef liftS
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1173 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
2602 5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1174 #else
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1175 b2[i] += (W_BM(b1[i] + b3[i])+4b2[i]+W_BO)>>W_BS;
5ec55feb6fdd rescale coefficients during IDWT, that way the lifting steps are much simpler and faster michael parents: 2601 diff changeset	1176 #endif
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1177 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1178 }
b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1179 }
b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1180
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1181 static void spatial_compose97i_buffered_init(dwt_compose_t cs, slice_buffer sb, int height, int stride_line){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1182 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1183 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1184 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1185 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1186 cs->y = -3;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1187 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1188
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1189 static void spatial_compose97i_init(dwt_compose_t cs, IDWTELEM buffer, int height, int stride){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1190 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1191 cs->b1 = buffer + mirror(-3 , height-1)*stride;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1192 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1193 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1194 cs->y = -3;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1195 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1196
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1197 static void spatial_compose97i_dy_buffered(DSPContext dsp, dwt_compose_t cs, slice_buffer * sb, int width, int height, int stride_line){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1198 int y = cs->y;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1199
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1200 IDWTELEM *b0= cs->b0;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1201 IDWTELEM *b1= cs->b1;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1202 IDWTELEM *b2= cs->b2;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1203 IDWTELEM *b3= cs->b3;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1204 IDWTELEM b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) stride_line);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1205 IDWTELEM b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) stride_line);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1206
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1207 {START_TIMER
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1208 if(y>0 && y+4<height){
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1209 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1210 }else{
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1211 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1212 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1213 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1214 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
2592 b70b4b69960b merge vertical lifting steps and a little cleanup michael parents: 2589 diff changeset	1215 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1216 if(width>400){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1217 STOP_TIMER("vertical_compose97i")}}
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1218
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1219 {START_TIMER
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1220 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1221 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
3012 088920c095fc cleanup michael parents: 3000 diff changeset	1222 if(width>400 && y+0<(unsigned)height){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1223 STOP_TIMER("horizontal_compose97i")}}
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1224
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1225 cs->b0=b2;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1226 cs->b1=b3;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1227 cs->b2=b4;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1228 cs->b3=b5;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1229 cs->y += 2;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1230 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1231
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1232 static void spatial_compose97i_dy(dwt_compose_t cs, IDWTELEM buffer, int width, int height, int stride){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1233 int y = cs->y;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1234 IDWTELEM *b0= cs->b0;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1235 IDWTELEM *b1= cs->b1;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1236 IDWTELEM *b2= cs->b2;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1237 IDWTELEM *b3= cs->b3;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1238 IDWTELEM b4= buffer + mirror(y+3, height-1)stride;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1239 IDWTELEM b5= buffer + mirror(y+4, height-1)stride;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1240
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1241 {START_TIMER
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1242 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1243 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1244 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	1245 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1246 if(width>400){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1247 STOP_TIMER("vertical_compose97i")}}
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1248
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1249 {START_TIMER
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1250 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1251 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1252 if(width>400 && b0 <= b2){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1253 STOP_TIMER("horizontal_compose97i")}}
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1254
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1255 cs->b0=b2;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1256 cs->b1=b3;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1257 cs->b2=b4;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1258 cs->b3=b5;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1259 cs->y += 2;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1260 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1261
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1262 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1263 dwt_compose_t cs;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1264 spatial_compose97i_init(&cs, buffer, height, stride);
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1265 while(cs.y <= height)
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1266 spatial_compose97i_dy(&cs, buffer, width, height, stride);
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1267 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1268
3075 961af1358c7f add static keyword to some functions mru parents: 3063 diff changeset	1269 static void ff_spatial_idwt_buffered_init(dwt_compose_t cs, slice_buffer sb, int width, int height, int stride_line, int type, int decomposition_count){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1270 int level;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1271 for(level=decomposition_count-1; level>=0; level--){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1272 switch(type){
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1273 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1274 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1275 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1276 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1277 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1278
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1279 static void ff_spatial_idwt_init(dwt_compose_t cs, IDWTELEM buffer, int width, int height, int stride, int type, int decomposition_count){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1280 int level;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1281 for(level=decomposition_count-1; level>=0; level--){
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1282 switch(type){
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1283 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1284 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1285 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1286 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1287 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1288
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1289 static void ff_spatial_idwt_slice(dwt_compose_t cs, IDWTELEM buffer, int width, int height, int stride, int type, int decomposition_count, int y){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1290 const int support = type==1 ? 3 : 5;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1291 int level;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1292 if(type==2) return;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1293
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1294 for(level=decomposition_count-1; level>=0; level--){
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1295 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1296 switch(type){
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1297 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1298 break;
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1299 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1300 break;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1301 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1302 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1303 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1304 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1305
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	1306 static void ff_spatial_idwt_buffered_slice(DSPContext dsp, dwt_compose_t cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1307 const int support = type==1 ? 3 : 5;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1308 int level;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1309 if(type==2) return;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1310
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1311 for(level=decomposition_count-1; level>=0; level--){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1312 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1313 switch(type){
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1314 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1315 break;
3326 fb245e797c5d Snow: cosmetics lorenm parents: 3325 diff changeset	1316 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1317 break;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1318 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1319 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1320 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1321 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1322
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1323 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1324 dwt_compose_t cs[MAX_DECOMPOSITIONS];
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1325 int y;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1326 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1327 for(y=0; y<height; y+=4)
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	1328 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1329 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1330
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1331 static int encode_subband_c0run(SnowContext s, SubBand b, IDWTELEM src, IDWTELEM parent, int stride, int orientation){
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1332 const int w= b->width;
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1333 const int h= b->height;
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1334 int x, y;
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1335
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1336 if(1){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1337 int run=0;
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1338 int runs[w*h];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1339 int run_index=0;
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1340 int max_index;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1341
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1342 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1343 for(x=0; x<w; x++){
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1344 int v, p=0;
2144 e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1345 int /ll=0, /l=0, lt=0, t=0, rt=0;
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1346 v= src[x + y*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1347
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1348 if(y){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1349 t= src[x + (y-1)*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1350 if(x){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1351 lt= src[x - 1 + (y-1)*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1352 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1353 if(x + 1 < w){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1354 rt= src[x + 1 + (y-1)*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1355 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1356 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1357 if(x){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1358 l= src[x - 1 + y*stride];
2144 e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1359 /*if(x > 1){
e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1360 if(orientation==1) ll= src[y + (x-2)*stride];
e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1361 else ll= src[x - 2 + y*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1362 }*/
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1363 }
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1364 if(parent){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1365 int px= x>>1;
48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1366 int py= y>>1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1367 if(px<b->parent->width && py<b->parent->height)
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1368 p= parent[px + py2stride];
678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1369 }
678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1370 if(!(/ll\|/l\|lt\|t\|rt\|p)){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1371 if(v){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1372 runs[run_index++]= run;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1373 run=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1374 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1375 run++;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1376 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1377 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1378 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1379 }
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1380 max_index= run_index;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1381 runs[run_index++]= run;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1382 run_index=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1383 run= runs[run_index++];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1384
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1385 put_symbol2(&s->c, b->state[30], max_index, 0);
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1386 if(run_index <= max_index)
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1387 put_symbol2(&s->c, b->state[1], run, 3);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1388
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1389 for(y=0; y<h; y++){
2435 c89ac0e70c66 10l patch by (matthieu castet <castet.matthieu free fr>) michael parents: 2422 diff changeset	1390 if(s->c.bytestream_end - s->c.bytestream < w*40){
2422 18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	1391 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	1392 return -1;
18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	1393 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1394 for(x=0; x<w; x++){
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1395 int v, p=0;
2144 e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1396 int /ll=0, /l=0, lt=0, t=0, rt=0;
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1397 v= src[x + y*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1398
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1399 if(y){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1400 t= src[x + (y-1)*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1401 if(x){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1402 lt= src[x - 1 + (y-1)*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1403 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1404 if(x + 1 < w){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1405 rt= src[x + 1 + (y-1)*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1406 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1407 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1408 if(x){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1409 l= src[x - 1 + y*stride];
2144 e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1410 /*if(x > 1){
e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1411 if(orientation==1) ll= src[y + (x-2)*stride];
e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1412 else ll= src[x - 2 + y*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1413 }*/
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1414 }
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1415 if(parent){
2149 48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1416 int px= x>>1;
48dc4ec06e88 cleanup michael parents: 2148 diff changeset	1417 int py= y>>1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1418 if(px<b->parent->width && py<b->parent->height)
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1419 p= parent[px + py2stride];
678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1420 }
678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1421 if(/ll\|/l\|lt\|t\|rt\|p){
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1422 int context= av_log2(/FFABS(ll) + /3FFABS(l) + FFABS(lt) + 2FFABS(t) + FFABS(rt) + FFABS(p));
2144 e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1423
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1424 put_rac(&s->c, &b->state[0][context], !!v);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1425 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1426 if(!run){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1427 run= runs[run_index++];
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1428
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1429 if(run_index <= max_index)
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1430 put_symbol2(&s->c, b->state[1], run, 3);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1431 assert(v);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1432 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1433 run--;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1434 assert(!v);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1435 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1436 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1437 if(v){
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1438 int context= av_log2(/FFABS(ll) + /3FFABS(l) + FFABS(lt) + 2FFABS(t) + FFABS(rt) + FFABS(p));
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1439 int l2= 2*FFABS(l) + (l<0);
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1440 int t2= 2*FFABS(t) + (t<0);
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1441
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1442 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
2596 3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	1443 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1444 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1445 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1446 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1447 }
2422 18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	1448 return 0;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1449 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1450
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1451 static int encode_subband(SnowContext s, SubBand b, IDWTELEM src, IDWTELEM parent, int stride, int orientation){
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1452 // encode_subband_qtree(s, b, src, parent, stride, orientation);
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1453 // encode_subband_z0run(s, b, src, parent, stride, orientation);
2422 18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	1454 return encode_subband_c0run(s, b, src, parent, stride, orientation);
2155 274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1455 // encode_subband_dzr(s, b, src, parent, stride, orientation);
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1456 }
274a01d80f4a various subband encoders (all either worse or complicated so they are commented out) michael parents: 2152 diff changeset	1457
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1458 static inline void unpack_coeffs(SnowContext s, SubBand b, SubBand * parent, int orientation){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1459 const int w= b->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1460 const int h= b->height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1461 int x,y;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1462
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1463 if(1){
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1464 int run, runs;
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1465 x_and_coeff *xc= b->x_coeff;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1466 x_and_coeff *prev_xc= NULL;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1467 x_and_coeff *prev2_xc= xc;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1468 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1469 x_and_coeff *prev_parent_xc= parent_xc;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1470
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1471 runs= get_symbol2(&s->c, b->state[30], 0);
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1472 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1473 else run= INT_MAX;
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1474
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1475 for(y=0; y<h; y++){
2193 ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1476 int v=0;
ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1477 int lt=0, t=0, rt=0;
ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1478
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1479 if(y && prev_xc->x == 0){
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1480 rt= prev_xc->coeff;
2193 ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1481 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1482 for(x=0; x<w; x++){
2193 ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1483 int p=0;
ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1484 const int l= v;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1485
2193 ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1486 lt= t; t= rt;
ea7715935e55 5% faster decode_subband() michael parents: 2192 diff changeset	1487
2194 d29037435955 11% faster decode_subband() michael parents: 2193 diff changeset	1488 if(y){
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1489 if(prev_xc->x <= x)
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1490 prev_xc++;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1491 if(prev_xc->x == x + 1)
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1492 rt= prev_xc->coeff;
2194 d29037435955 11% faster decode_subband() michael parents: 2193 diff changeset	1493 else
d29037435955 11% faster decode_subband() michael parents: 2193 diff changeset	1494 rt=0;
d29037435955 11% faster decode_subband() michael parents: 2193 diff changeset	1495 }
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1496 if(parent_xc){
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1497 if(x>>1 > parent_xc->x){
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1498 parent_xc++;
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	1499 }
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1500 if(x>>1 == parent_xc->x){
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1501 p= parent_xc->coeff;
2194 d29037435955 11% faster decode_subband() michael parents: 2193 diff changeset	1502 }
2148 678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1503 }
678be5a8f282 use parent sample to predict significance & magnitude michael parents: 2146 diff changeset	1504 if(/ll\|/l\|lt\|t\|rt\|p){
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1505 int context= av_log2(/FFABS(ll) + /3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
2144 e33371c1e2b4 better context model (0.2-1% lower bitrate) michael parents: 2139 diff changeset	1506
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1507 v=get_rac(&s->c, &b->state[0][context]);
2605 39ef4c5454f0 optimizing unpack_coeffs() michael parents: 2604 diff changeset	1508 if(v){
39ef4c5454f0 optimizing unpack_coeffs() michael parents: 2604 diff changeset	1509 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
39ef4c5454f0 optimizing unpack_coeffs() michael parents: 2604 diff changeset	1510 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1511
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1512 xc->x=x;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1513 (xc++)->coeff= v;
2605 39ef4c5454f0 optimizing unpack_coeffs() michael parents: 2604 diff changeset	1514 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1515 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1516 if(!run){
2609 0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1517 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
0f74a379a890 store the number of runs to avoid storing the last run value michael parents: 2608 diff changeset	1518 else run= INT_MAX;
2605 39ef4c5454f0 optimizing unpack_coeffs() michael parents: 2604 diff changeset	1519 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
39ef4c5454f0 optimizing unpack_coeffs() michael parents: 2604 diff changeset	1520 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1521
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1522 xc->x=x;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1523 (xc++)->coeff= v;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1524 }else{
2606 2649aeaadc44 minor optimization michael parents: 2605 diff changeset	1525 int max_run;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1526 run--;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1527 v=0;
2191 44afbcec70b8 50% faster decode_subband() michael parents: 2189 diff changeset	1528
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1529 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
2606 2649aeaadc44 minor optimization michael parents: 2605 diff changeset	1530 else max_run= FFMIN(run, w-x-1);
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1531 if(parent_xc)
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1532 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
2606 2649aeaadc44 minor optimization michael parents: 2605 diff changeset	1533 x+= max_run;
2649aeaadc44 minor optimization michael parents: 2605 diff changeset	1534 run-= max_run;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1535 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1536 }
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	1537 }
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1538 (xc++)->x= w+1; //end marker
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1539 prev_xc= prev2_xc;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1540 prev2_xc= xc;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1541
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1542 if(parent_xc){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1543 if(y&1){
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1544 while(parent_xc->x != parent->width+1)
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1545 parent_xc++;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1546 parent_xc++;
fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1547 prev_parent_xc= parent_xc;
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	1548 }else{
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1549 parent_xc= prev_parent_xc;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1550 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1551 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1552 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1553
2607 fde7b6fe2aaf replace complicated pointer dereference + index stuff by pointers in unpack_coeffs() michael parents: 2606 diff changeset	1554 (xc++)->x= w+1; //end marker
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1555 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1556 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1557
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1558 static inline void decode_subband_slice_buffered(SnowContext s, SubBand b, slice_buffer * sb, int start_y, int h, int save_state[1]){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1559 const int w= b->width;
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	1560 int y;
4594 a96d905dcbaa Add av_ prefix to clip functions reimar parents: 4588 diff changeset	1561 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	1562 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1563 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1564 int new_index = 0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1565
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1566 START_TIMER
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1567
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1568 if(b->ibuf == s->spatial_idwt_buffer \|\| s->qlog == LOSSLESS_QLOG){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1569 qadd= 0;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1570 qmul= 1<<QEXPSHIFT;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1571 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1572
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1573 /* If we are on the second or later slice, restore our index. */
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1574 if (start_y != 0)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1575 new_index = save_state[0];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1576
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1577
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1578 for(y=start_y; y<h; y++){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1579 int x = 0;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1580 int v;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1581 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	1582 memset(line, 0, b->width*sizeof(IDWTELEM));
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1583 v = b->x_coeff[new_index].coeff;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1584 x = b->x_coeff[new_index++].x;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1585 while(x < w)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1586 {
2596 3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	1587 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	1588 register int u= -(v&1);
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	1589 line[x] = (t^u) - u;
3e90a8cfddc6 10% faster unpack_coeffs michael parents: 2595 diff changeset	1590
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1591 v = b->x_coeff[new_index].coeff;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1592 x = b->x_coeff[new_index++].x;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1593 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1594 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1595 if(w > 200 && start_y != 0/level+1 == s->spatial_decomposition_count/){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1596 STOP_TIMER("decode_subband")
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1597 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1598
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1599 /* Save our variables for the next slice. */
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1600 save_state[0] = new_index;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1601
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	1602 return;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1603 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1604
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	1605 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1606 int plane_index, level, orientation;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1607
2199 e0b08bdf565d 10l (keyframes and context resets) michael parents: 2198 diff changeset	1608 for(plane_index=0; plane_index<3; plane_index++){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1609 for(level=0; level<s->spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1610 for(orientation=level ? 1:0; orientation<4; orientation++){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1611 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1612 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1613 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	1614 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1615 memset(s->header_state, MID_STATE, sizeof(s->header_state));
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1616 memset(s->block_state, MID_STATE, sizeof(s->block_state));
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1617 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1618
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1619 static int alloc_blocks(SnowContext *s){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1620 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1621 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1622
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1623 s->b_width = w;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1624 s->b_height= h;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1625
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1627 return 0;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1628 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1629
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1630 static inline void copy_rac_state(RangeCoder d, RangeCoder s){
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1631 uint8_t *bytestream= d->bytestream;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1632 uint8_t *bytestream_start= d->bytestream_start;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1633 d= s;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1634 d->bytestream= bytestream;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1635 d->bytestream_start= bytestream_start;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1636 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1637
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1638 //near copy & paste from dsputil, FIXME
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1639 static int pix_sum(uint8_t * pix, int line_size, int w)
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1640 {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1641 int s, i, j;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1642
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1643 s = 0;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1644 for (i = 0; i < w; i++) {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1645 for (j = 0; j < w; j++) {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1646 s += pix[0];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1647 pix ++;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1648 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1649 pix += line_size - w;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1650 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1651 return s;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1652 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1653
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1654 //near copy & paste from dsputil, FIXME
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1655 static int pix_norm1(uint8_t * pix, int line_size, int w)
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1656 {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1657 int s, i, j;
4179 46d38e3d7038 rename squareTbl -> ff_squareTbl mru parents: 4123 diff changeset	1658 uint32_t *sq = ff_squareTbl + 256;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1659
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1660 s = 0;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1661 for (i = 0; i < w; i++) {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1662 for (j = 0; j < w; j ++) {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1663 s += sq[pix[0]];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1664 pix ++;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1665 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1666 pix += line_size - w;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1667 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1668 return s;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1669 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1670
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1672 const int w= s->b_width << s->block_max_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1673 const int rem_depth= s->block_max_depth - level;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1674 const int index= (x + y*w) << rem_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1675 const int block_w= 1<<rem_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1676 BlockNode block;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1677 int i,j;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1678
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1679 block.color[0]= l;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1680 block.color[1]= cb;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1681 block.color[2]= cr;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1682 block.mx= mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1683 block.my= my;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1684 block.ref= ref;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1685 block.type= type;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1686 block.level= level;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1687
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1688 for(j=0; j<block_w; j++){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1689 for(i=0; i<block_w; i++){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1690 s->block[index + i + j*w]= block;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1691 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1692 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1693 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1694
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1695 static inline void init_ref(MotionEstContext c, uint8_t src[3], uint8_t ref[3], uint8_t ref2[3], int x, int y, int ref_index){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1696 const int offset[3]= {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1697 y*c-> stride + x,
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1698 ((y*c->uvstride + x)>>1),
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1699 ((y*c->uvstride + x)>>1),
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1700 };
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1701 int i;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1702 for(i=0; i<3; i++){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1703 c->src[0][i]= src [i];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1704 c->ref[0][i]= ref [i] + offset[i];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1705 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1706 assert(!ref_index);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1707 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1708
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1709 static inline void pred_mv(SnowContext s, int mx, int *my, int ref,
4408 c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1710 const BlockNode left, const BlockNode top, const BlockNode *tr){
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1711 if(s->ref_frames == 1){
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1712 *mx = mid_pred(left->mx, top->mx, tr->mx);
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1713 *my = mid_pred(left->my, top->my, tr->my);
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1714 }else{
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1715 const int *scale = scale_mv_ref[ref];
4407 15688bdfe1b5 Brings down the number of snow.c warnings from 33 to 27 by putting parentheses takis parents: 4360 diff changeset	1716 mx = mid_pred((left->mx scale[left->ref] + 128) >>8,
15688bdfe1b5 Brings down the number of snow.c warnings from 33 to 27 by putting parentheses takis parents: 4360 diff changeset	1717 (top ->mx * scale[top ->ref] + 128) >>8,
15688bdfe1b5 Brings down the number of snow.c warnings from 33 to 27 by putting parentheses takis parents: 4360 diff changeset	1718 (tr ->mx * scale[tr ->ref] + 128) >>8);
15688bdfe1b5 Brings down the number of snow.c warnings from 33 to 27 by putting parentheses takis parents: 4360 diff changeset	1719 my = mid_pred((left->my scale[left->ref] + 128) >>8,
15688bdfe1b5 Brings down the number of snow.c warnings from 33 to 27 by putting parentheses takis parents: 4360 diff changeset	1720 (top ->my * scale[top ->ref] + 128) >>8,
15688bdfe1b5 Brings down the number of snow.c warnings from 33 to 27 by putting parentheses takis parents: 4360 diff changeset	1721 (tr ->my * scale[tr ->ref] + 128) >>8);
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1722 }
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1723 }
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1724
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1725 //FIXME copy&paste
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1726 #define P_LEFT P[1]
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1727 #define P_TOP P[2]
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1728 #define P_TOPRIGHT P[3]
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1729 #define P_MEDIAN P[4]
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1730 #define P_MV1 P[9]
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1731 #define FLAG_QPEL 1 //must be 1
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1732
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1734 uint8_t p_buffer[1024];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1735 uint8_t i_buffer[1024];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1736 uint8_t p_state[sizeof(s->block_state)];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1737 uint8_t i_state[sizeof(s->block_state)];
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1738 RangeCoder pc, ic;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1739 uint8_t *pbbak= s->c.bytestream;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1740 uint8_t *pbbak_start= s->c.bytestream_start;
5082 176ac8353f48 fix assertion failure michael parents: 4962 diff changeset	1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1742 const int w= s->b_width << s->block_max_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1743 const int h= s->b_height << s->block_max_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1744 const int rem_depth= s->block_max_depth - level;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1745 const int index= (x + y*w) << rem_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1746 const int block_w= 1<<(LOG2_MB_SIZE - level);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1747 int trx= (x+1)<<rem_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1748 int try= (y+1)<<rem_depth;
4408 c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1750 const BlockNode *top = y ? &s->block[index-w] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1754 const BlockNode *tr = y && trx<w && ((x&1)==0 \|\| level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1755 int pl = left->color[0];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1756 int pcb= left->color[1];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1757 int pcr= left->color[2];
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1758 int pmx, pmy;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1759 int mx=0, my=0;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1760 int l,cr,cb;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1761 const int stride= s->current_picture.linesize[0];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1762 const int uvstride= s->current_picture.linesize[1];
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1763 uint8_t current_data[3]= { s->input_picture.data[0] + (x + y stride)*block_w,
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1764 s->input_picture.data[1] + (x + yuvstride)block_w/2,
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1765 s->input_picture.data[2] + (x + yuvstride)block_w/2};
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1766 int P[10][2];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1767 int16_t last_mv[3][2];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1769 const int shift= 1+qpel;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1770 MotionEstContext *c= &s->m.me;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1771 int ref_context= av_log2(2left->ref) + av_log2(2top->ref);
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1773 int my_context= av_log2(2*FFABS(left->my - top->my));
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1774 int s_context= 2left->level + 2top->level + tl->level + tr->level;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1775 int ref, best_ref, ref_score, ref_mx, ref_my;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1776
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1777 assert(sizeof(s->block_state) >= 256);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1778 if(s->keyframe){
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1780 return 0;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1781 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1782
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1783 // clip predictors / edge ?
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1784
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1785 P_LEFT[0]= left->mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1786 P_LEFT[1]= left->my;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1787 P_TOP [0]= top->mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1788 P_TOP [1]= top->my;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1789 P_TOPRIGHT[0]= tr->mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1790 P_TOPRIGHT[1]= tr->my;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1791
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1792 last_mv[0][0]= s->block[index].mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1793 last_mv[0][1]= s->block[index].my;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1794 last_mv[1][0]= right->mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1795 last_mv[1][1]= right->my;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1796 last_mv[2][0]= bottom->mx;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1797 last_mv[2][1]= bottom->my;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1798
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1799 s->m.mb_stride=2;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1800 s->m.mb_x=
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1801 s->m.mb_y= 0;
4360 5f887a3c7281 Change the Snow encoder to always use the available MotionEstContext pointer. takis parents: 4332 diff changeset	1802 c->skip= 0;
5f887a3c7281 Change the Snow encoder to always use the available MotionEstContext pointer. takis parents: 4332 diff changeset	1803
5f887a3c7281 Change the Snow encoder to always use the available MotionEstContext pointer. takis parents: 4332 diff changeset	1804 assert(c-> stride == stride);
5f887a3c7281 Change the Snow encoder to always use the available MotionEstContext pointer. takis parents: 4332 diff changeset	1805 assert(c->uvstride == uvstride);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1806
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1811
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	1812 c->xmin = - x*block_w - 16+2;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	1813 c->ymin = - y*block_w - 16+2;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1816
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1824
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1827
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1828 if (!y) {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1829 c->pred_x= P_LEFT[0];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1830 c->pred_y= P_LEFT[1];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1831 } else {
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1832 c->pred_x = P_MEDIAN[0];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1833 c->pred_y = P_MEDIAN[1];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1834 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1835
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1836 score= INT_MAX;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1837 best_ref= 0;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1838 for(ref=0; ref<s->ref_frames; ref++){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_wx, block_wy, 0);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1840
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /ref_index/ 0, last_mv,
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1843
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1844 assert(ref_mx >= c->xmin);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1845 assert(ref_mx <= c->xmax);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1846 assert(ref_my >= c->ymin);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1847 assert(ref_my <= c->ymax);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1848
4360 5f887a3c7281 Change the Snow encoder to always use the available MotionEstContext pointer. takis parents: 4332 diff changeset	1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1851 ref_score+= 2av_log2(2ref)*c->penalty_factor;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1852 if(s->ref_mvs[ref]){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1853 s->ref_mvs[ref][index][0]= ref_mx;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1854 s->ref_mvs[ref][index][1]= ref_my;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1855 s->ref_scores[ref][index]= ref_score;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1856 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1857 if(score > ref_score){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1858 score= ref_score;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1859 best_ref= ref;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1860 mx= ref_mx;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1861 my= ref_my;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1862 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1863 }
5127 4dbe6578f811 misc spelling fixes diego parents: 5085 diff changeset	1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1865
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1866 // subpel search
5085 81bbca75ae2f fix assert() 2nd try michael parents: 5082 diff changeset	1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1868 pc= s->c;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1869 pc.bytestream_start=
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1871 memcpy(p_state, s->block_state, sizeof(s->block_state));
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1872
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1873 if(level!=s->block_max_depth)
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1874 put_rac(&pc, &p_state[4 + s_context], 1);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1876 if(s->ref_frames > 1)
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1879 put_symbol(&pc, &p_state[128 + 32(mx_context + 16!!best_ref)], mx - pmx, 1);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1880 put_symbol(&pc, &p_state[128 + 32(my_context + 16!!best_ref)], my - pmy, 1);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1881 p_len= pc.bytestream - pc.bytestream_start;
5082 176ac8353f48 fix assertion failure michael parents: 4962 diff changeset	1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1883
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1884 block_s= block_w*block_w;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1885 sum = pix_sum(current_data[0], stride, block_w);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1886 l= (sum + block_s/2)/block_s;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2lsum + llblock_s;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1888
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1889 block_s= block_w*block_w>>2;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1890 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1891 cb= (sum + block_s/2)/block_s;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1892 // iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2cbsum + cbcbblock_s;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1893 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1894 cr= (sum + block_s/2)/block_s;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1895 // iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2crsum + crcrblock_s;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1896
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1897 ic= s->c;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1898 ic.bytestream_start=
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1900 memcpy(i_state, s->block_state, sizeof(s->block_state));
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1901 if(level!=s->block_max_depth)
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1902 put_rac(&ic, &i_state[4 + s_context], 1);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1904 put_symbol(&ic, &i_state[32], l-pl , 1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1905 put_symbol(&ic, &i_state[64], cb-pcb, 1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1906 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1907 i_len= ic.bytestream - ic.bytestream_start;
5082 176ac8353f48 fix assertion failure michael parents: 4962 diff changeset	1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1909
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1910 // assert(score==256256256*64-1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1911 assert(iscore < 255255256 + s->lambda2*10);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1912 assert(iscore >= 0);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1913 assert(l>=0 && l<=255);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1914 assert(pl>=0 && pl<=255);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1915
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1916 if(level==0){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1917 int varc= iscore >> 8;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1918 int vard= score >> 8;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1919 if (vard <= 64 \|\| vard < varc)
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1921 else
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1922 c->scene_change_score+= s->m.qscale;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1923 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1924
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1925 if(level!=s->block_max_depth){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1926 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1927 score2 = encode_q_branch(s, level+1, 2x+0, 2y+0);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1928 score2+= encode_q_branch(s, level+1, 2x+1, 2y+0);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1929 score2+= encode_q_branch(s, level+1, 2x+0, 2y+1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1930 score2+= encode_q_branch(s, level+1, 2x+1, 2y+1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1932
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1933 if(score2 < score && score2 < iscore)
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1934 return score2;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1935 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	1936
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1937 if(iscore < score){
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1939 memcpy(pbbak, i_buffer, i_len);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1940 s->c= ic;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1941 s->c.bytestream_start= pbbak_start;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1942 s->c.bytestream= pbbak + i_len;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1944 memcpy(s->block_state, i_state, sizeof(s->block_state));
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1945 return iscore;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1946 }else{
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1947 memcpy(pbbak, p_buffer, p_len);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1948 s->c= pc;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1949 s->c.bytestream_start= pbbak_start;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	1950 s->c.bytestream= pbbak + p_len;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1952 memcpy(s->block_state, p_state, sizeof(s->block_state));
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1953 return score;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1954 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1955 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	1956
4283 d6f83e2f8804 rename always_inline to av_always_inline and move to common.h mru parents: 4197 diff changeset	1957 static av_always_inline int same_block(BlockNode a, BlockNode b){
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1959 return !((a->color[0] - b->color[0]) \| (a->color[1] - b->color[1]) \| (a->color[2] - b->color[2]));
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1960 }else{
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1961 return !((a->mx - b->mx) \| (a->my - b->my) \| (a->ref - b->ref) \| ((a->type ^ b->type)&BLOCK_INTRA));
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1962 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1963 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1964
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1966 const int w= s->b_width << s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1967 const int rem_depth= s->block_max_depth - level;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1968 const int index= (x + y*w) << rem_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1969 int trx= (x+1)<<rem_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1970 BlockNode *b= &s->block[index];
4408 c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1971 const BlockNode *left = x ? &s->block[index-1] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1972 const BlockNode *top = y ? &s->block[index-w] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	1974 const BlockNode *tr = y && trx<w && ((x&1)==0 \|\| level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1975 int pl = left->color[0];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1976 int pcb= left->color[1];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1977 int pcr= left->color[2];
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1978 int pmx, pmy;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	1979 int ref_context= av_log2(2left->ref) + av_log2(2top->ref);
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1980 int mx_context= av_log2(2FFABS(left->mx - top->mx)) + 16!!b->ref;
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	1981 int my_context= av_log2(2FFABS(left->my - top->my)) + 16!!b->ref;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1982 int s_context= 2left->level + 2top->level + tl->level + tr->level;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1983
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1984 if(s->keyframe){
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1986 return;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1987 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1988
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1989 if(level!=s->block_max_depth){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2995 dfc271b90fe6 4mv + iter ME 10l fix (still not bugfree but better) michael parents: 2994 diff changeset	1991 put_rac(&s->c, &s->block_state[4 + s_context], 1);
dfc271b90fe6 4mv + iter ME 10l fix (still not bugfree but better) michael parents: 2994 diff changeset	1992 }else{
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1993 put_rac(&s->c, &s->block_state[4 + s_context], 0);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1994 encode_q_branch2(s, level+1, 2x+0, 2y+0);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1995 encode_q_branch2(s, level+1, 2x+1, 2y+0);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1996 encode_q_branch2(s, level+1, 2x+0, 2y+1);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1997 encode_q_branch2(s, level+1, 2x+1, 2y+1);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1998 return;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	1999 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2000 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2001 if(b->type & BLOCK_INTRA){
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2008 }else{
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2011 if(s->ref_frames > 1)
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2016 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2017 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2018
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2020 const int w= s->b_width << s->block_max_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2021 const int rem_depth= s->block_max_depth - level;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2022 const int index= (x + y*w) << rem_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2023 int trx= (x+1)<<rem_depth;
4408 c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2024 const BlockNode *left = x ? &s->block[index-1] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2025 const BlockNode *top = y ? &s->block[index-w] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2027 const BlockNode *tr = y && trx<w && ((x&1)==0 \|\| level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2028 int s_context= 2left->level + 2top->level + tl->level + tr->level;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2029
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2030 if(s->keyframe){
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2032 return;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2033 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2034
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	2035 if(level==s->block_max_depth \|\| get_rac(&s->c, &s->block_state[4 + s_context])){
4332 35812973e0bc dead code removial michael parents: 4331 diff changeset	2036 int type, mx, my;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2037 int l = left->color[0];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2038 int cb= left->color[1];
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2039 int cr= left->color[2];
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2040 int ref = 0;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2041 int ref_context= av_log2(2left->ref) + av_log2(2top->ref);
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2042 int mx_context= av_log2(2FFABS(left->mx - top->mx)) + 0av_log2(2*FFABS(tr->mx - top->mx));
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2043 int my_context= av_log2(2FFABS(left->my - top->my)) + 0av_log2(2*FFABS(tr->my - top->my));
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2044
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2046
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2047 if(type){
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2048 pred_mv(s, &mx, &my, 0, left, top, tr);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2049 l += get_symbol(&s->c, &s->block_state[32], 1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2050 cb+= get_symbol(&s->c, &s->block_state[64], 1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2051 cr+= get_symbol(&s->c, &s->block_state[96], 1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2052 }else{
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2053 if(s->ref_frames > 1)
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2055 pred_mv(s, &mx, &my, ref, left, top, tr);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32(mx_context + 16!!ref)], 1);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2057 my+= get_symbol(&s->c, &s->block_state[128 + 32(my_context + 16!!ref)], 1);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2058 }
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2060 }else{
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2061 decode_q_branch(s, level+1, 2x+0, 2y+0);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2062 decode_q_branch(s, level+1, 2x+1, 2y+0);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2063 decode_q_branch(s, level+1, 2x+0, 2y+1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2064 decode_q_branch(s, level+1, 2x+1, 2y+1);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2065 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2066 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2067
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	2068 static void encode_blocks(SnowContext *s, int search){
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2069 int x, y;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2070 int w= s->b_width;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2071 int h= s->b_height;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2072
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2074 iterative_me(s);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2075
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2076 for(y=0; y<h; y++){
2435 c89ac0e70c66 10l patch by (matthieu castet <castet.matthieu free fr>) michael parents: 2422 diff changeset	2077 if(s->c.bytestream_end - s->c.bytestream < wMB_SIZEMB_SIZE*3){ //FIXME nicer limit
2422 18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	2079 return;
18b8b2dcc037 various security fixes and precautionary checks michael parents: 2408 diff changeset	2080 }
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2081 for(x=0; x<w; x++){
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	2082 if(s->avctx->me_method == ME_ITER \|\| !search)
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2083 encode_q_branch2(s, 0, x, y);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2084 else
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2085 encode_q_branch (s, 0, x, y);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2086 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2087 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2088 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2089
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2090 static void decode_blocks(SnowContext *s){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2091 int x, y;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2092 int w= s->b_width;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2093 int h= s->b_height;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2094
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2095 for(y=0; y<h; y++){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2096 for(x=0; x<w; x++){
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2097 decode_q_branch(s, 0, x, y);
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2098 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2099 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2100 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2101
5254 727a49c28c51 trivial warning fixes mru parents: 5224 diff changeset	2102 static void mc_block(uint8_t dst, const uint8_t src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2103 int x, y;
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2104 START_TIMER
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2105 for(y=0; y < b_h+5; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2106 for(x=0; x < b_w; x++){
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2107 int a0= src[x ];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2108 int a1= src[x + 1];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2109 int a2= src[x + 2];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2110 int a3= src[x + 3];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2111 int a4= src[x + 4];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2112 int a5= src[x + 5];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2113 // int am= 9*(a1+a2) - (a0+a3);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2114 int am= 20(a2+a3) - 5(a1+a4) + (a0+a5);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2115 // int am= 18(a2+a3) - 2(a1+a4);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2116 // int aL= (-7a0 + 105a1 + 35a2 - 5a3)>>3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2117 // int aR= (-7a3 + 105a2 + 35a1 - 5a0)>>3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2118
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2119 // if(b_w==16) am= 8*(a1+a2);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2120
2610 f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2121 if(dx<8) am = (32a2( 8-dx) + am* dx + 128)>>8;
f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2122 else am = ( am(16-dx) + 32a3*(dx-8) + 128)>>8;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2123
2610 f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2124 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2125 if(am&(~255)) am= ~(am>>31);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2126
2610 f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2127 tmp[x] = am;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2128
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2129 /* if (dx< 4) tmp[x + ystride]= (16a1( 4-dx) + aL dx + 32)>>6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2130 else if(dx< 8) tmp[x + ystride]= ( aL( 8-dx) + am*(dx- 4) + 32)>>6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2131 else if(dx<12) tmp[x + ystride]= ( am(12-dx) + aR*(dx- 8) + 32)>>6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2132 else tmp[x + ystride]= ( aR(16-dx) + 16a2(dx-12) + 32)>>6;*/
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2133 }
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2134 tmp += stride;
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2135 src += stride;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2136 }
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2137 tmp -= (b_h+5)*stride;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2138
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2139 for(y=0; y < b_h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2140 for(x=0; x < b_w; x++){
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2141 int a0= tmp[x + 0*stride];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2142 int a1= tmp[x + 1*stride];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2143 int a2= tmp[x + 2*stride];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2144 int a3= tmp[x + 3*stride];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2145 int a4= tmp[x + 4*stride];
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2146 int a5= tmp[x + 5*stride];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2147 int am= 20(a2+a3) - 5(a1+a4) + (a0+a5);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2148 // int am= 18(a2+a3) - 2(a1+a4);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2149 /* int aL= (-7a0 + 105a1 + 35a2 - 5a3)>>3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2150 int aR= (-7a3 + 105a2 + 35a1 - 5a0)>>3;*/
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2151
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2152 // if(b_w==16) am= 8*(a1+a2);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2153
2610 f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2154 if(dy<8) am = (32a2( 8-dy) + am* dy + 128)>>8;
f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2155 else am = ( am(16-dy) + 32a3*(dy-8) + 128)>>8;
f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2156
f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2157 if(am&(~255)) am= ~(am>>31);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2158
2610 f794026f4551 fix overflow in mc_block() patch by (Yartrebo )yartrebo earthlink net michael parents: 2609 diff changeset	2159 dst[x] = am;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2160 /* if (dy< 4) tmp[x + ystride]= (16a1( 4-dy) + aL dy + 32)>>6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2161 else if(dy< 8) tmp[x + ystride]= ( aL( 8-dy) + am*(dy- 4) + 32)>>6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2162 else if(dy<12) tmp[x + ystride]= ( am(12-dy) + aR*(dy- 8) + 32)>>6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2163 else tmp[x + ystride]= ( aR(16-dy) + 16a2(dy-12) + 32)>>6;*/
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2164 }
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2165 dst += stride;
3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2166 tmp += stride;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2167 }
2221 3543987dccad use pointer arithmetic in mc_block(), 25% faster michael parents: 2206 diff changeset	2168 STOP_TIMER("mc_block")
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2169 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2170
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2171 #define mca(dx,dy,b_w)\
5254 727a49c28c51 trivial warning fixes mru parents: 5224 diff changeset	2172 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t dst, const uint8_t src, int stride, int h){\
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2173 uint8_t tmp[stride*(b_w+5)];\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2174 assert(h==b_w);\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2175 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2176 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2177
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2178 mca( 0, 0,16)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2179 mca( 8, 0,16)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2180 mca( 0, 8,16)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2181 mca( 8, 8,16)
2224 11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	2182 mca( 0, 0,8)
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	2183 mca( 8, 0,8)
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	2184 mca( 0, 8,8)
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	2185 mca( 8, 8,8)
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2186
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2187 static void pred_block(SnowContext s, uint8_t dst, uint8_t tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode block, int plane_index, int w, int h){
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2188 if(block->type & BLOCK_INTRA){
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2189 int x, y;
3018 1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2190 const int color = block->color[plane_index];
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2191 const int color4= color*0x01010101;
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2192 if(b_w==32){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2193 for(y=0; y < b_h; y++){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2194 (uint32_t)&dst[0 + y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2195 (uint32_t)&dst[4 + y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2196 (uint32_t)&dst[8 + y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2197 (uint32_t)&dst[12+ y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2198 (uint32_t)&dst[16+ y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2199 (uint32_t)&dst[20+ y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2200 (uint32_t)&dst[24+ y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2201 (uint32_t)&dst[28+ y*stride]= color4;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2202 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2203 }else if(b_w==16){
3018 1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2204 for(y=0; y < b_h; y++){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2205 (uint32_t)&dst[0 + y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2206 (uint32_t)&dst[4 + y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2207 (uint32_t)&dst[8 + y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2208 (uint32_t)&dst[12+ y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2209 }
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2210 }else if(b_w==8){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2211 for(y=0; y < b_h; y++){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2212 (uint32_t)&dst[0 + y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2213 (uint32_t)&dst[4 + y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2214 }
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2215 }else if(b_w==4){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2216 for(y=0; y < b_h; y++){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2217 (uint32_t)&dst[0 + y*stride]= color4;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2218 }
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2219 }else{
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2220 for(y=0; y < b_h; y++){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2221 for(x=0; x < b_w; x++){
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2222 dst[x + y*stride]= color;
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2223 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2224 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2225 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2226 }else{
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2227 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2228 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2229 int mx= block->mx*scale;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2230 int my= block->my*scale;
2223 b26474e72d6d use h264 MC code if possible michael parents: 2221 diff changeset	2231 const int dx= mx&15;
b26474e72d6d use h264 MC code if possible michael parents: 2221 diff changeset	2232 const int dy= my&15;
3020 c75fb0747e74 use h264 MC functions for 2xX Xx2 blocks in snow too michael parents: 3018 diff changeset	2233 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2234 sx += (mx>>4) - 2;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2235 sy += (my>>4) - 2;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2236 src += sx + sy*stride;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2237 if( (unsigned)sx >= w - b_w - 4
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2238 \|\| (unsigned)sy >= h - b_h - 4){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2239 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2240 src= tmp + MB_SIZE;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2241 }
3189 e1cbe2635325 fix %8 != 0 w/h michael parents: 3075 diff changeset	2242 // assert(b_w == b_h \|\| 2b_w == b_h \|\| b_w == 2b_h);
e1cbe2635325 fix %8 != 0 w/h michael parents: 3075 diff changeset	2243 // assert(!(b_w&(b_w-1)));
3018 1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2244 assert(b_w>1 && b_h>1);
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2245 assert(tab_index>=0 && tab_index<4 \|\| b_w==32);
3189 e1cbe2635325 fix %8 != 0 w/h michael parents: 3075 diff changeset	2246 if((dx&3) \|\| (dy&3) \|\| !(b_w == b_h \|\| 2b_w == b_h \|\| b_w == 2b_h) \|\| (b_w&(b_w-1)))
2223 b26474e72d6d use h264 MC code if possible michael parents: 2221 diff changeset	2247 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2248 else if(b_w==32){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2249 int y;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2250 for(y=0; y<b_h; y+=16){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2251 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + ystride, src + 2 + (y+2)stride,stride);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2252 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + ystride, src + 18 + (y+2)stride,stride);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2253 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2254 }else if(b_w==b_h)
3020 c75fb0747e74 use h264 MC functions for 2xX Xx2 blocks in snow too michael parents: 3018 diff changeset	2255 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
3018 1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2256 else if(b_w==2*b_h){
3020 c75fb0747e74 use h264 MC functions for 2xX Xx2 blocks in snow too michael parents: 3018 diff changeset	2257 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
c75fb0747e74 use h264 MC functions for 2xX Xx2 blocks in snow too michael parents: 3018 diff changeset	2258 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
3018 1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2259 }else{
1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2260 assert(2*b_w==b_h);
3020 c75fb0747e74 use h264 MC functions for 2xX Xx2 blocks in snow too michael parents: 3018 diff changeset	2261 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
c75fb0747e74 use h264 MC functions for 2xX Xx2 blocks in snow too michael parents: 3018 diff changeset	2262 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_wstride,src + 2 + 2stride+b_w*stride,stride);
3018 1d22fe43aa78 use h264 MC functions for rectangular blocks too michael parents: 3017 diff changeset	2263 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2264 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2265 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2266
4436 d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2267 void ff_snow_inner_add_yblock(const uint8_t obmc, const int obmc_stride, uint8_t * block, int b_w, int b_h,
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2268 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2269 int y, x;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2270 IDWTELEM * dst;
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2271 for(y=0; y<b_h; y++){
5409 13d52b7647a0 Ahem, fix typos overlooked in last commit. diego parents: 5408 diff changeset	2272 //FIXME ugly misuse of obmc_stride
4436 d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2273 const uint8_t obmc1= obmc + yobmc_stride;
d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2274 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2275 const uint8_t obmc3= obmc1+ obmc_stride(obmc_stride>>1);
d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2276 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2277 dst = slice_buffer_get_line(sb, src_y + y);
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2278 for(x=0; x<b_w; x++){
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2279 int v= obmc1[x] * block[3][x + y*src_stride]
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2280 +obmc2[x] * block[2][x + y*src_stride]
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2281 +obmc3[x] * block[1][x + y*src_stride]
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2282 +obmc4[x] * block[0][x + y*src_stride];
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2283
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2284 v <<= 8 - LOG2_OBMC_MAX;
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2285 if(FRAC_BITS != 8){
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2286 v >>= 8 - FRAC_BITS;
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2287 }
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2288 if(add){
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2289 v += dst[x + src_x];
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2290 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2291 if(v&(~255)) v= ~(v>>31);
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2292 dst8[x + y*src_stride] = v;
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2293 }else{
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2294 dst[x + src_x] -= v;
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2295 }
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2296 }
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2297 }
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2298 }
6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	2299
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2300 //FIXME name clenup (b_w, block_w, b_width stuff)
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2301 static av_always_inline void add_yblock(SnowContext s, int sliced, slice_buffer sb, IDWTELEM dst, uint8_t dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2302 const int b_width = s->b_width << s->block_max_depth;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2303 const int b_height= s->b_height << s->block_max_depth;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2304 const int b_stride= b_width;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2305 BlockNode lt= &s->block[b_x + b_yb_stride];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2306 BlockNode *rt= lt+1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2307 BlockNode *lb= lt+b_stride;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2308 BlockNode *rb= lb+1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2309 uint8_t *block[4];
2842 55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2310 int tmp_step= src_stride >= 7MB_SIZE ? MB_SIZE : MB_SIZEsrc_stride;
55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2311 uint8_t tmp[src_stride7MB_SIZE]; //FIXME align
55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2312 uint8_t *ptmp;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2313 int x,y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2314
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2315 if(b_x<0){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2316 lt= rt;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2317 lb= rb;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2318 }else if(b_x + 1 >= b_width){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2319 rt= lt;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2320 rb= lb;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2321 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2322 if(b_y<0){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2323 lt= lb;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2324 rt= rb;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2325 }else if(b_y + 1 >= b_height){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2326 lb= lt;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2327 rb= rt;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2328 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2329
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2330 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2331 obmc -= src_x;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2332 b_w += src_x;
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2333 if(!sliced && !offset_dst)
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2334 dst -= src_x;
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2335 src_x=0;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2336 }else if(src_x + b_w > w){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2337 b_w = w - src_x;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2338 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2339 if(src_y<0){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2340 obmc -= src_y*obmc_stride;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2341 b_h += src_y;
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2342 if(!sliced && !offset_dst)
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2343 dst -= src_y*dst_stride;
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2344 src_y=0;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2345 }else if(src_y + b_h> h){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2346 b_h = h - src_y;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2347 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2348
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2349 if(b_w<=0 \|\| b_h<=0) return;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2350
2842 55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2351 assert(src_stride > 2*MB_SIZE + 5);
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2352 if(!sliced && offset_dst)
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2353 dst += src_x + src_y*dst_stride;
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2354 dst8+= src_x + src_y*src_stride;
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2355 // src += src_x + src_y*src_stride;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2356
2842 55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2357 ptmp= tmp + 3*tmp_step;
55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2358 block[0]= ptmp;
55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2359 ptmp+=tmp_step;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2360 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2361
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2362 if(same_block(lt, rt)){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2363 block[1]= block[0];
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2364 }else{
2842 55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2365 block[1]= ptmp;
55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2366 ptmp+=tmp_step;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2367 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2368 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2369
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2370 if(same_block(lt, lb)){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2371 block[2]= block[0];
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2372 }else if(same_block(rt, lb)){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2373 block[2]= block[1];
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2374 }else{
2842 55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2375 block[2]= ptmp;
55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2376 ptmp+=tmp_step;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2377 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2378 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2379
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2380 if(same_block(lt, rb) ){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2381 block[3]= block[0];
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2382 }else if(same_block(rt, rb)){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2383 block[3]= block[1];
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2384 }else if(same_block(lb, rb)){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2385 block[3]= block[2];
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2386 }else{
2842 55b83c229b88 reducing minimum supported picture size michael parents: 2658 diff changeset	2387 block[3]= ptmp;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2388 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2389 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2390 #if 0
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2391 for(y=0; y<b_h; y++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2392 for(x=0; x<b_w; x++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2393 int v= obmc [x + yobmc_stride] block[3][x + ysrc_stride] (256/OBMC_MAX);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2394 if(add) dst[x + y*dst_stride] += v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2395 else dst[x + y*dst_stride] -= v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2396 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2397 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2398 for(y=0; y<b_h; y++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2399 uint8_t *obmc2= obmc + (obmc_stride>>1);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2400 for(x=0; x<b_w; x++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2401 int v= obmc2[x + yobmc_stride] block[2][x + ysrc_stride] (256/OBMC_MAX);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2402 if(add) dst[x + y*dst_stride] += v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2403 else dst[x + y*dst_stride] -= v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2404 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2405 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2406 for(y=0; y<b_h; y++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2407 uint8_t obmc3= obmc + obmc_stride(obmc_stride>>1);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2408 for(x=0; x<b_w; x++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2409 int v= obmc3[x + yobmc_stride] block[1][x + ysrc_stride] (256/OBMC_MAX);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2410 if(add) dst[x + y*dst_stride] += v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2411 else dst[x + y*dst_stride] -= v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2412 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2413 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2414 for(y=0; y<b_h; y++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2415 uint8_t obmc3= obmc + obmc_stride(obmc_stride>>1);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2416 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2417 for(x=0; x<b_w; x++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2418 int v= obmc4[x + yobmc_stride] block[0][x + ysrc_stride] (256/OBMC_MAX);
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2419 if(add) dst[x + y*dst_stride] += v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2420 else dst[x + y*dst_stride] -= v;
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2421 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2422 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2423 #else
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2424 if(sliced){
fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2425 START_TIMER
fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2426
fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2427 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2428 STOP_TIMER("inner_add_yblock")
fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2429 }else
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2430 for(y=0; y<b_h; y++){
5409 13d52b7647a0 Ahem, fix typos overlooked in last commit. diego parents: 5408 diff changeset	2431 //FIXME ugly misuse of obmc_stride
4436 d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2432 const uint8_t obmc1= obmc + yobmc_stride;
d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2433 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2434 const uint8_t obmc3= obmc1+ obmc_stride(obmc_stride>>1);
d3e389536b0a Add the const specifier as needed to reduce the number of warnings. takis parents: 4409 diff changeset	2435 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2436 for(x=0; x<b_w; x++){
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2437 int v= obmc1[x] * block[3][x + y*src_stride]
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2438 +obmc2[x] * block[2][x + y*src_stride]
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2439 +obmc3[x] * block[1][x + y*src_stride]
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2440 +obmc4[x] * block[0][x + y*src_stride];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2441
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2442 v <<= 8 - LOG2_OBMC_MAX;
2246 3414ac0b8c55 8 -> FRAC_BITS michael parents: 2241 diff changeset	2443 if(FRAC_BITS != 8){
3414ac0b8c55 8 -> FRAC_BITS michael parents: 2241 diff changeset	2444 v >>= 8 - FRAC_BITS;
3414ac0b8c55 8 -> FRAC_BITS michael parents: 2241 diff changeset	2445 }
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2446 if(add){
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2447 v += dst[x + y*dst_stride];
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2448 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2449 if(v&(~255)) v= ~(v>>31);
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2450 dst8[x + y*src_stride] = v;
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2451 }else{
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2452 dst[x + y*dst_stride] -= v;
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2453 }
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2454 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2455 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2456 #endif
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2457 }
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2458
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2459 static av_always_inline void predict_slice_buffered(SnowContext s, slice_buffer sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2460 Plane *p= &s->plane[plane_index];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2461 const int mb_w= s->b_width << s->block_max_depth;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2462 const int mb_h= s->b_height << s->block_max_depth;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2463 int x, y, mb_x;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2464 int block_size = MB_SIZE >> s->block_max_depth;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2465 int block_w = plane_index ? block_size/2 : block_size;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2466 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2467 int obmc_stride= plane_index ? block_size : 2*block_size;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2468 int ref_stride= s->current_picture.linesize[plane_index];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2469 uint8_t *dst8= s->current_picture.data[plane_index];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2470 int w= p->width;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2471 int h= p->height;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2472 START_TIMER
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2473
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2474 if(s->keyframe \|\| (s->avctx->debug&512)){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2475 if(mb_y==mb_h)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2476 return;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2477
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2478 if(add){
2604 b7e6c3d31c65 Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/) michael parents: 2602 diff changeset	2479 for(y=block_wmb_y; y<FFMIN(h,block_w(mb_y+1)); y++)
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2480 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2481 // DWTELEM * line = slice_buffer_get_line(sb, y);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2482 IDWTELEM * line = sb->line[y];
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2483 for(x=0; x<w; x++)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2484 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2485 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2486 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2487 v >>= FRAC_BITS;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2488 if(v&(~255)) v= ~(v>>31);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2489 dst8[x + y*ref_stride]= v;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2490 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2491 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2492 }else{
2604 b7e6c3d31c65 Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/) michael parents: 2602 diff changeset	2493 for(y=block_wmb_y; y<FFMIN(h,block_w(mb_y+1)); y++)
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2494 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2495 // DWTELEM * line = slice_buffer_get_line(sb, y);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2496 IDWTELEM * line = sb->line[y];
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2497 for(x=0; x<w; x++)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2498 {
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2499 line[x] -= 128 << FRAC_BITS;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2500 // buf[x + y*w]-= 128<<FRAC_BITS;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2501 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2502 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2503 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2504
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2505 return;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2506 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2507
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2508 for(mb_x=0; mb_x<=mb_w; mb_x++){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2509 START_TIMER
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2510
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2511 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2512 block_w*mb_x - block_w/2,
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2513 block_w*mb_y - block_w/2,
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2514 block_w, block_w,
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2515 w, h,
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2516 w, ref_stride, obmc_stride,
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2517 mb_x - 1, mb_y - 1,
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2518 add, 0, plane_index);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2519
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2520 STOP_TIMER("add_yblock")
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2521 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2522
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2523 STOP_TIMER("predict_slice")
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2524 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	2525
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2526 static av_always_inline void predict_slice(SnowContext s, IDWTELEM buf, int plane_index, int add, int mb_y){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2527 Plane *p= &s->plane[plane_index];
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2528 const int mb_w= s->b_width << s->block_max_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2529 const int mb_h= s->b_height << s->block_max_depth;
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2530 int x, y, mb_x;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2531 int block_size = MB_SIZE >> s->block_max_depth;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	2532 int block_w = plane_index ? block_size/2 : block_size;
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2533 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2534 const int obmc_stride= plane_index ? block_size : 2*block_size;
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2535 int ref_stride= s->current_picture.linesize[plane_index];
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2536 uint8_t *dst8= s->current_picture.data[plane_index];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2537 int w= p->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2538 int h= p->height;
2197 ffa4fa6bebe9 10l michael parents: 2195 diff changeset	2539 START_TIMER
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2540
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2541 if(s->keyframe \|\| (s->avctx->debug&512)){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2542 if(mb_y==mb_h)
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2543 return;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2544
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2545 if(add){
2604 b7e6c3d31c65 Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/) michael parents: 2602 diff changeset	2546 for(y=block_wmb_y; y<FFMIN(h,block_w(mb_y+1)); y++){
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2547 for(x=0; x<w; x++){
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2548 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2549 v >>= FRAC_BITS;
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2550 if(v&(~255)) v= ~(v>>31);
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2551 dst8[x + y*ref_stride]= v;
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2552 }
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2553 }
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2554 }else{
2604 b7e6c3d31c65 Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/) michael parents: 2602 diff changeset	2555 for(y=block_wmb_y; y<FFMIN(h,block_w(mb_y+1)); y++){
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2556 for(x=0; x<w; x++){
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2557 buf[x + y*w]-= 128<<FRAC_BITS;
2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	2558 }
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2559 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2560 }
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2561
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2562 return;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2563 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2564
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2565 for(mb_x=0; mb_x<=mb_w; mb_x++){
2197 ffa4fa6bebe9 10l michael parents: 2195 diff changeset	2566 START_TIMER
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2567
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2568 add_yblock(s, 0, NULL, buf, dst8, obmc,
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2569 block_w*mb_x - block_w/2,
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2570 block_w*mb_y - block_w/2,
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2571 block_w, block_w,
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2572 w, h,
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2573 w, ref_stride, obmc_stride,
713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2574 mb_x - 1, mb_y - 1,
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2575 add, 1, plane_index);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2576
2206 713ad427a3c7 20% faster predict_plane() michael parents: 2199 diff changeset	2577 STOP_TIMER("add_yblock")
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2578 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	2579
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2580 STOP_TIMER("predict_slice")
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2581 }
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2582
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2583 static av_always_inline void predict_plane(SnowContext s, IDWTELEM buf, int plane_index, int add){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2584 const int mb_h= s->b_height << s->block_max_depth;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2585 int mb_y;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2586 for(mb_y=0; mb_y<=mb_h; mb_y++)
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	2587 predict_slice(s, buf, plane_index, add, mb_y);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2588 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	2589
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2590 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2591 int i, x2, y2;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2592 Plane *p= &s->plane[plane_index];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2593 const int block_size = MB_SIZE >> s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2594 const int block_w = plane_index ? block_size/2 : block_size;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2595 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2596 const int obmc_stride= plane_index ? block_size : 2*block_size;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2597 const int ref_stride= s->current_picture.linesize[plane_index];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2598 uint8_t *src= s-> input_picture.data[plane_index];
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2599 IDWTELEM dst= (IDWTELEM)s->m.obmc_scratchpad + plane_indexblock_sizeblock_size*4; //FIXME change to unsigned
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2600 const int b_stride = s->b_width << s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2601 const int w= p->width;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2602 const int h= p->height;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2603 int index= mb_x + mb_y*b_stride;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2604 BlockNode *b= &s->block[index];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2605 BlockNode backup= *b;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2606 int ab=0;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2607 int aa=0;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2608
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2609 b->type\|= BLOCK_INTRA;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2610 b->color[plane_index]= 0;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2611 memset(dst, 0, obmc_strideobmc_stridesizeof(IDWTELEM));
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2612
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2613 for(i=0; i<4; i++){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2614 int mb_x2= mb_x + (i &1) - 1;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2615 int mb_y2= mb_y + (i>>1) - 1;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2616 int x= block_w*mb_x2 + block_w/2;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2617 int y= block_w*mb_y2 + block_w/2;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2618
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2619 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)obmc_stride)block_w, NULL, obmc,
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2620 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2621
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2622 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2623 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2624 int index= x2-(block_wmb_x - block_w/2) + (y2-(block_wmb_y - block_w/2))*obmc_stride;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2625 int obmc_v= obmc[index];
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2626 int d;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2627 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2628 if(x<0) obmc_v += obmc[index + block_w];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2629 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2630 if(x+block_w>w) obmc_v += obmc[index - block_w];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2631 //FIXME precalc this or simplify it somehow else
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2632
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2633 d = -dst[index] + (1<<(FRAC_BITS-1));
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2634 dst[index] = d;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2635 ab += (src[x2 + y2ref_stride] - (d>>FRAC_BITS)) obmc_v;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2636 aa += obmc_v * obmc_v; //FIXME precalclate this
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2637 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2638 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2639 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2640 *b= backup;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2641
5127 4dbe6578f811 misc spelling fixes diego parents: 5085 diff changeset	2642 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2643 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2644
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2645 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2646 const int b_stride = s->b_width << s->block_max_depth;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2647 const int b_height = s->b_height<< s->block_max_depth;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2648 int index= x + y*b_stride;
4408 c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2649 const BlockNode *b = &s->block[index];
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2650 const BlockNode *left = x ? &s->block[index-1] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2651 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2652 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
c3be13eac2d6 Bring down the number of snow.c warnings from 27 to 17 by using the const takis parents: 4407 diff changeset	2653 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2654 int dmx, dmy;
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2655 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2656 // int my_context= av_log2(2*FFABS(left->my - top->my));
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2657
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2658 if(x<0 \|\| x>=b_stride \|\| y>=b_height)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2659 return 0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2660 /*
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2661 1 0 0
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2662 01X 1-2 1
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2663 001XX 3-6 2-3
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2664 0001XXX 7-14 4-7
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2665 00001XXXX 15-30 8-15
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2666 */
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2667 //FIXME try accurate rate
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2668 //FIXME intra and inter predictors if surrounding blocks arent the same type
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2669 if(b->type & BLOCK_INTRA){
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2670 return 3+2( av_log2(2FFABS(left->color[0] - b->color[0]))
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2671 + av_log2(2*FFABS(left->color[1] - b->color[1]))
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2672 + av_log2(2*FFABS(left->color[2] - b->color[2])));
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2673 }else{
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2674 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2675 dmx-= b->mx;
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2676 dmy-= b->my;
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2677 return 2(1 + av_log2(2FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	2678 + av_log2(2*FFABS(dmy))
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2679 + av_log2(2*b->ref));
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	2680 }
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2681 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2682
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2683 static int get_block_rd(SnowContext s, int mb_x, int mb_y, int plane_index, const uint8_t obmc_edged){
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2684 Plane *p= &s->plane[plane_index];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2685 const int block_size = MB_SIZE >> s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2686 const int block_w = plane_index ? block_size/2 : block_size;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2687 const int obmc_stride= plane_index ? block_size : 2*block_size;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2688 const int ref_stride= s->current_picture.linesize[plane_index];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2689 uint8_t *dst= s->current_picture.data[plane_index];
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2690 uint8_t *src= s-> input_picture.data[plane_index];
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2691 IDWTELEM pred= (IDWTELEM)s->m.obmc_scratchpad + plane_indexblock_sizeblock_size*4;
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2692 uint8_t cur[ref_stride2MB_SIZE]; //FIXME alignment
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2693 uint8_t tmp[ref_stride(2MB_SIZE+5)];
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2694 const int b_stride = s->b_width << s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2695 const int b_height = s->b_height<< s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2696 const int w= p->width;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2697 const int h= p->height;
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2698 int distortion;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2699 int rate= 0;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2700 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2701 int sx= block_w*mb_x - block_w/2;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2702 int sy= block_w*mb_y - block_w/2;
3206 c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2703 int x0= FFMAX(0,-sx);
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2704 int y0= FFMAX(0,-sy);
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2705 int x1= FFMIN(block_w*2, w-sx);
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2706 int y1= FFMIN(block_w*2, h-sy);
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2707 int i,x,y;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2708
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2709 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w2, block_w2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2710
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2711 for(y=y0; y<y1; y++){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2712 const uint8_t obmc1= obmc_edged + yobmc_stride;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2713 const IDWTELEM pred1 = pred + yobmc_stride;
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2714 uint8_t cur1 = cur + yref_stride;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2715 uint8_t dst1 = dst + sx + (sy+y)ref_stride;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2716 for(x=x0; x<x1; x++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2717 #if FRAC_BITS >= LOG2_OBMC_MAX
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2718 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2719 #else
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2720 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2721 #endif
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2722 v = (v + pred1[x]) >> FRAC_BITS;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2723 if(v&(~255)) v= ~(v>>31);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2724 dst1[x] = v;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2725 }
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2726 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2727
3206 c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2728 /* copy the regions where obmc[] = (uint8_t)256 */
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2729 if(LOG2_OBMC_MAX == 8
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2730 && (mb_x == 0 \|\| mb_x == b_stride-1)
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2731 && (mb_y == 0 \|\| mb_y == b_height-1)){
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2732 if(mb_x == 0)
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2733 x1 = block_w;
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2734 else
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2735 x0 = block_w;
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2736 if(mb_y == 0)
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2737 y1 = block_w;
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2738 else
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2739 y0 = block_w;
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2740 for(y=y0; y<y1; y++)
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2741 memcpy(dst + sx+x0 + (sy+y)ref_stride, cur + x0 + yref_stride, x1-x0);
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2742 }
c1add9fe5c65 Snow mmx + sse2 part 2 corey parents: 3198 diff changeset	2743
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2744 if(block_w==16){
3323 87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2745 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2746 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2747 /* FIXME cmps overlap but don't cover the wavelet's whole support,
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2748 * so improving the score of one block is not strictly guaranteed to
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2749 * improve the score of the whole frame, so iterative motion est
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2750 * doesn't always converge. */
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2751 if(s->avctx->me_cmp == FF_CMP_W97)
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	2752 distortion = w97_32_c(&s->m, src + sx + syref_stride, dst + sx + syref_stride, ref_stride, 32);
3323 87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2753 else if(s->avctx->me_cmp == FF_CMP_W53)
4197 bbe0bc387a19 revert bad checkin mru parents: 4196 diff changeset	2754 distortion = w53_32_c(&s->m, src + sx + syref_stride, dst + sx + syref_stride, ref_stride, 32);
3323 87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2755 else{
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2756 distortion = 0;
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2757 for(i=0; i<4; i++){
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2758 int off = sx+16(i&1) + (sy+16(i>>1))*ref_stride;
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2759 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2760 }
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2761 }
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2762 }else{
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2763 assert(block_w==8);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2764 distortion = s->dsp.me_cmp[0](&s->m, src + sx + syref_stride, dst + sx + syref_stride, ref_stride, block_w*2);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2765 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2766
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2767 if(plane_index==0){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2768 for(i=0; i<4; i++){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2769 /* ..RRr
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2770 * .RXx.
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2771 * rxx..
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2772 */
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2773 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2774 }
3057 8f78b00d1252 fix convergence of snow iterative_me lorenm parents: 3056 diff changeset	2775 if(mb_x == b_stride-2)
8f78b00d1252 fix convergence of snow iterative_me lorenm parents: 3056 diff changeset	2776 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2777 }
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2778 return distortion + rate*penalty_factor;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2779 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2780
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2781 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2782 int i, y2;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2783 Plane *p= &s->plane[plane_index];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2784 const int block_size = MB_SIZE >> s->block_max_depth;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2785 const int block_w = plane_index ? block_size/2 : block_size;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2786 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2787 const int obmc_stride= plane_index ? block_size : 2*block_size;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2788 const int ref_stride= s->current_picture.linesize[plane_index];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2789 uint8_t *dst= s->current_picture.data[plane_index];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2790 uint8_t *src= s-> input_picture.data[plane_index];
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	2791 static const IDWTELEM zero_dst[4096]; //FIXME
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2792 const int b_stride = s->b_width << s->block_max_depth;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2793 const int w= p->width;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2794 const int h= p->height;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2795 int distortion= 0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2796 int rate= 0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2797 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2798
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2799 for(i=0; i<9; i++){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2800 int mb_x2= mb_x + (i%3) - 1;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2801 int mb_y2= mb_y + (i/3) - 1;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2802 int x= block_w*mb_x2 + block_w/2;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2803 int y= block_w*mb_y2 + block_w/2;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2804
3662 fc714e9a5419 snow cosmetics: merge the sliced and non-sliced versions of add_yblock lorenm parents: 3661 diff changeset	2805 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2806 x, y, block_w, block_w, w, h, /dst_stride/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2807
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2808 //FIXME find a cleaner/simpler way to skip the outside stuff
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2809 for(y2= y; y2<0; y2++)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2810 memcpy(dst + x + y2ref_stride, src + x + y2ref_stride, block_w);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2811 for(y2= h; y2<y+block_w; y2++)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2812 memcpy(dst + x + y2ref_stride, src + x + y2ref_stride, block_w);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2813 if(x<0){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2814 for(y2= y; y2<y+block_w; y2++)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2815 memcpy(dst + x + y2ref_stride, src + x + y2ref_stride, -x);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2816 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2817 if(x+block_w > w){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2818 for(y2= y; y2<y+block_w; y2++)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2819 memcpy(dst + w + y2ref_stride, src + w + y2ref_stride, x+block_w - w);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2820 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2821
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2822 assert(block_w== 8 \|\| block_w==16);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2823 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + yref_stride, dst + x + yref_stride, ref_stride, block_w);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2824 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2825
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2826 if(plane_index==0){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2827 BlockNode b= &s->block[mb_x+mb_yb_stride];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2828 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2829
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2830 /* ..RRRr
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2831 * .RXXx.
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2832 * .RXXx.
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2833 * rxxx.
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2834 */
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2835 if(merged)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2836 rate = get_block_bits(s, mb_x, mb_y, 2);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2837 for(i=merged?4:0; i<9; i++){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2838 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2839 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2840 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2841 }
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2842 return distortion + rate*penalty_factor;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2843 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2844
4283 d6f83e2f8804 rename always_inline to av_always_inline and move to common.h mru parents: 4197 diff changeset	2845 static av_always_inline int check_block(SnowContext s, int mb_x, int mb_y, int p[3], int intra, const uint8_t obmc_edged, int *best_rd){
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2846 const int b_stride= s->b_width << s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2847 BlockNode block= &s->block[mb_x + mb_y b_stride];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2848 BlockNode backup= *block;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2849 int rd, index, value;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2850
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2851 assert(mb_x>=0 && mb_y>=0);
2994 657e8546090d 10l michael parents: 2993 diff changeset	2852 assert(mb_x<b_stride);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2853
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2854 if(intra){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2855 block->color[0] = p[0];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2856 block->color[1] = p[1];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2857 block->color[2] = p[2];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2858 block->type \|= BLOCK_INTRA;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2859 }else{
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2860 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2861 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2862 if(s->me_cache[index] == value)
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2863 return 0;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2864 s->me_cache[index]= value;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2865
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2866 block->mx= p[0];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2867 block->my= p[1];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2868 block->type &= ~BLOCK_INTRA;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2869 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2870
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2871 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2872
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2873 //FIXME chroma
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2874 if(rd < *best_rd){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2875 *best_rd= rd;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2876 return 1;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2877 }else{
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2878 *block= backup;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2879 return 0;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2880 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2881 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2882
2999 bc83e7a080a5 this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it mmu_man parents: 2998 diff changeset	2883 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
4283 d6f83e2f8804 rename always_inline to av_always_inline and move to common.h mru parents: 4197 diff changeset	2884 static av_always_inline int check_block_inter(SnowContext s, int mb_x, int mb_y, int p0, int p1, const uint8_t obmc_edged, int *best_rd){
2999 bc83e7a080a5 this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it mmu_man parents: 2998 diff changeset	2885 int p[2] = {p0, p1};
3197 8f53630cd24e dont pass intra flag if its always 0 michael parents: 3194 diff changeset	2886 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2999 bc83e7a080a5 this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it mmu_man parents: 2998 diff changeset	2887 }
bc83e7a080a5 this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it mmu_man parents: 2998 diff changeset	2888
4283 d6f83e2f8804 rename always_inline to av_always_inline and move to common.h mru parents: 4197 diff changeset	2889 static av_always_inline int check_4block_inter(SnowContext s, int mb_x, int mb_y, int p0, int p1, int ref, int best_rd){
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2890 const int b_stride= s->b_width << s->block_max_depth;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2891 BlockNode block= &s->block[mb_x + mb_y b_stride];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2892 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2893 int rd, index, value;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2894
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2895 assert(mb_x>=0 && mb_y>=0);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2896 assert(mb_x<b_stride);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2897 assert(((mb_x\|mb_y)&1) == 0);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2898
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2899 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2900 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2901 if(s->me_cache[index] == value)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2902 return 0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2903 s->me_cache[index]= value;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2904
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2905 block->mx= p0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2906 block->my= p1;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2907 block->ref= ref;
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2908 block->type &= ~BLOCK_INTRA;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2909 block[1]= block[b_stride]= block[b_stride+1]= *block;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2910
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2911 rd= get_4block_rd(s, mb_x, mb_y, 0);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2912
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2913 //FIXME chroma
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2914 if(rd < *best_rd){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2915 *best_rd= rd;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2916 return 1;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2917 }else{
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2918 block[0]= backup[0];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2919 block[1]= backup[1];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2920 block[b_stride]= backup[2];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2921 block[b_stride+1]= backup[3];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2922 return 0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2923 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2924 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	2925
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2926 static void iterative_me(SnowContext *s){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2927 int pass, mb_x, mb_y;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2928 const int b_width = s->b_width << s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2929 const int b_height= s->b_height << s->block_max_depth;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2930 const int b_stride= b_width;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2931 int color[3];
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2932
3194 c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2933 {
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2934 RangeCoder r = s->c;
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2935 uint8_t state[sizeof(s->block_state)];
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2936 memcpy(state, s->block_state, sizeof(s->block_state));
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2937 for(mb_y= 0; mb_y<s->b_height; mb_y++)
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2938 for(mb_x= 0; mb_x<s->b_width; mb_x++)
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2939 encode_q_branch(s, 0, mb_x, mb_y);
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2940 s->c = r;
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2941 memcpy(s->block_state, state, sizeof(s->block_state));
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2942 }
c30e9bcbb716 seed iterative_me with mvs from conventional search. lorenm parents: 3193 diff changeset	2943
3323 87c54a3f8d19 Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation. lorenm parents: 3322 diff changeset	2944 for(pass=0; pass<25; pass++){
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2945 int change= 0;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2946
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2947 for(mb_y= 0; mb_y<b_height; mb_y++){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2948 for(mb_x= 0; mb_x<b_width; mb_x++){
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2949 int dia_change, i, j, ref;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2950 int best_rd= INT_MAX, ref_rd;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	2951 BlockNode backup, ref_b;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2952 const int index= mb_x + mb_y * b_stride;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2953 BlockNode *block= &s->block[index];
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2954 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2955 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2956 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2957 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2958 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2959 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2960 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	2961 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2962 const int b_w= (MB_SIZE >> s->block_max_depth);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2963 uint8_t obmc_edged[b_w2][b_w2];
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2964
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2965 if(pass && (block->type & BLOCK_OPT))
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2966 continue;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2967 block->type \|= BLOCK_OPT;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2968
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2969 backup= *block;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2970
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2971 if(!s->me_cache_generation)
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2972 memset(s->me_cache, 0, sizeof(s->me_cache));
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2973 s->me_cache_generation += 1<<22;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	2974
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2975 //FIXME precalc
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2976 {
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2977 int x, y;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2978 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_wb_w4);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2979 if(mb_x==0)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2980 for(y=0; y<b_w*2; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2981 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2982 if(mb_x==b_stride-1)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2983 for(y=0; y<b_w*2; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2984 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2985 if(mb_y==0){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2986 for(x=0; x<b_w*2; x++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2987 obmc_edged[0][x] += obmc_edged[b_w-1][x];
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2988 for(y=1; y<b_w; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2989 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2990 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2991 if(mb_y==b_height-1){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2992 for(x=0; x<b_w*2; x++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2993 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2994 for(y=b_w; y<b_w*2-1; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2995 memcpy(obmc_edged[y], obmc_edged[b_w2-1], b_w2);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2996 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2997 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2998
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	2999 //skip stuff outside the picture
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3000 if(mb_x==0 \|\| mb_y==0 \|\| mb_x==b_width-1 \|\| mb_y==b_height-1)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3001 {
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3002 uint8_t *src= s-> input_picture.data[0];
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3003 uint8_t *dst= s->current_picture.data[0];
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3004 const int stride= s->current_picture.linesize[0];
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3005 const int block_w= MB_SIZE >> s->block_max_depth;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3006 const int sx= block_w*mb_x - block_w/2;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3007 const int sy= block_w*mb_y - block_w/2;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3008 const int w= s->plane[0].width;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3009 const int h= s->plane[0].height;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3010 int y;
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3011
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3012 for(y=sy; y<0; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3013 memcpy(dst + sx + ystride, src + sx + ystride, block_w*2);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3014 for(y=h; y<sy+block_w*2; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3015 memcpy(dst + sx + ystride, src + sx + ystride, block_w*2);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3016 if(sx<0){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3017 for(y=sy; y<sy+block_w*2; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3018 memcpy(dst + sx + ystride, src + sx + ystride, -sx);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3019 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3020 if(sx+block_w*2 > w){
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3021 for(y=sy; y<sy+block_w*2; y++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3022 memcpy(dst + w + ystride, src + w + ystride, sx+block_w*2 - w);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3023 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3024 }
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3025
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3026 // intra(black) = neighbors' contribution to the current block
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3027 for(i=0; i<3; i++)
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3028 color[i]= get_dc(s, mb_x, mb_y, i);
e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3029
5127 4dbe6578f811 misc spelling fixes diego parents: 5085 diff changeset	3030 // get previous score (cannot be cached due to OBMC)
3057 8f78b00d1252 fix convergence of snow iterative_me lorenm parents: 3056 diff changeset	3031 if(pass > 0 && (block->type&BLOCK_INTRA)){
8f78b00d1252 fix convergence of snow iterative_me lorenm parents: 3056 diff changeset	3032 int color0[3]= {block->color[0], block->color[1], block->color[2]};
8f78b00d1252 fix convergence of snow iterative_me lorenm parents: 3056 diff changeset	3033 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
8f78b00d1252 fix convergence of snow iterative_me lorenm parents: 3056 diff changeset	3034 }else
3197 8f53630cd24e dont pass intra flag if its always 0 michael parents: 3194 diff changeset	3035 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
8f53630cd24e dont pass intra flag if its always 0 michael parents: 3194 diff changeset	3036
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3037 ref_b= *block;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3038 ref_rd= best_rd;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3039 for(ref=0; ref < s->ref_frames; ref++){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3040 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3041 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3042 continue;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3043 block->ref= ref;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3044 best_rd= INT_MAX;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3045
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3046 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3047 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3048 if(tb)
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3049 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3050 if(lb)
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3051 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3052 if(rb)
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3053 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3054 if(bb)
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3055 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3056
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3057 /* fullpel ME */
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3058 //FIXME avoid subpel interpol / round to nearest integer
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3059 do{
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3060 dia_change=0;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3061 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3062 for(j=0; j<i; j++){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3063 dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx+4(i-j), block->my+(4j), *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3064 dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx-4(i-j), block->my-(4j), *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3065 dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx+4(i-j), block->my-(4j), *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3066 dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx-4(i-j), block->my+(4j), *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3067 }
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3068 }
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3069 }while(dia_change);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3070 /* subpel ME */
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3071 do{
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3072 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3073 dia_change=0;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3074 for(i=0; i<8; i++)
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3075 dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3076 }while(dia_change);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3077 //FIXME or try the standard 2 pass qpel or similar
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3078
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3079 mvr[0][0]= block->mx;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3080 mvr[0][1]= block->my;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3081 if(ref_rd > best_rd){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3082 ref_rd= best_rd;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3083 ref_b= *block;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3084 }
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3085 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3086 best_rd= ref_rd;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3087 *block= ref_b;
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	3088 #if 1
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3089 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3090 //FIXME RD style color selection
2998 733ceb1e079e mem corruption fix michael parents: 2995 diff changeset	3091 #endif
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3092 if(!same_block(block, &backup)){
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3093 if(tb ) tb ->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3094 if(lb ) lb ->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3095 if(rb ) rb ->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3096 if(bb ) bb ->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3097 if(tlb) tlb->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3098 if(trb) trb->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3099 if(blb) blb->type &= ~BLOCK_OPT;
dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3100 if(brb) brb->type &= ~BLOCK_OPT;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3101 change ++;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3102 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3103 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3104 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3105 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3106 if(!change)
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3107 break;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3108 }
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3109
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3110 if(s->block_max_depth == 1){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3111 int change= 0;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3112 for(mb_y= 0; mb_y<b_height; mb_y+=2){
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3113 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3324 dbb617c134ff Snow: cosmetics lorenm parents: 3323 diff changeset	3114 int i;
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3115 int best_rd, init_rd;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3116 const int index= mb_x + mb_y * b_stride;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3117 BlockNode *b[4];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3118
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3119 b[0]= &s->block[index];
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3120 b[1]= b[0]+1;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3121 b[2]= b[0]+b_stride;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3122 b[3]= b[2]+1;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3123 if(same_block(b[0], b[1]) &&
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3124 same_block(b[0], b[2]) &&
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3125 same_block(b[0], b[3]))
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3126 continue;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3127
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3128 if(!s->me_cache_generation)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3129 memset(s->me_cache, 0, sizeof(s->me_cache));
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3130 s->me_cache_generation += 1<<22;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3131
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3132 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3133
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3134 //FIXME more multiref search?
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3135 check_4block_inter(s, mb_x, mb_y,
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3136 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3137 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3138
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3139 for(i=0; i<4; i++)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3140 if(!(b[i]->type&BLOCK_INTRA))
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3141 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3051 c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3142
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3143 if(init_rd != best_rd)
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3144 change++;
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3145 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3146 }
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3147 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
c0fde3eb7784 obmc-aware 4mv lorenm parents: 3036 diff changeset	3148 }
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3149 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3150
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3151 static void quantize(SnowContext s, SubBand b, IDWTELEM dst, DWTELEM src, int stride, int bias){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3152 const int level= b->level;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3153 const int w= b->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3154 const int h= b->height;
4594 a96d905dcbaa Add av_ prefix to clip functions reimar parents: 4588 diff changeset	3155 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
5575 a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	3156 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3157 int x,y, thres1, thres2;
2893 6f8bcb169256 fix unused variable warnings aurel parents: 2842 diff changeset	3158 // START_TIMER
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3159
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3160 if(s->qlog == LOSSLESS_QLOG){
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3161 for(y=0; y<h; y++)
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3162 for(x=0; x<w; x++)
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3163 dst[x + ystride]= src[x + ystride];
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3164 return;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3165 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3166
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3167 bias= bias ? 0 : (3*qmul)>>3;
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3168 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3169 thres2= 2*thres1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3170
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3171 if(!bias){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3172 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3173 for(x=0; x<w; x++){
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3174 int i= src[x + y*stride];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3175
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3176 if((unsigned)(i+thres1) > thres2){
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3177 if(i>=0){
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3178 i<<= QEXPSHIFT;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3179 i/= qmul; //FIXME optimize
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3180 dst[x + y*stride]= i;
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3181 }else{
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3182 i= -i;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3183 i<<= QEXPSHIFT;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3184 i/= qmul; //FIXME optimize
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3185 dst[x + y*stride]= -i;
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3186 }
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3187 }else
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3188 dst[x + y*stride]= 0;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3189 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3190 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3191 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3192 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3193 for(x=0; x<w; x++){
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3194 int i= src[x + y*stride];
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3195
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3196 if((unsigned)(i+thres1) > thres2){
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3197 if(i>=0){
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3198 i<<= QEXPSHIFT;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3199 i= (i + bias) / qmul; //FIXME optimize
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3200 dst[x + y*stride]= i;
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3201 }else{
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3202 i= -i;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3203 i<<= QEXPSHIFT;
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3204 i= (i + bias) / qmul; //FIXME optimize
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3205 dst[x + y*stride]= -i;
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3206 }
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3207 }else
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3208 dst[x + y*stride]= 0;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3209 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3210 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3211 }
2150 8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3212 if(level+1 == s->spatial_decomposition_count){
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3213 // STOP_TIMER("quantize")
8aff375a986b optimize quantizaton (about 3x faster) michael parents: 2149 diff changeset	3214 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3215 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3216
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3217 static void dequantize_slice_buffered(SnowContext s, slice_buffer sb, SubBand b, IDWTELEM src, int stride, int start_y, int end_y){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3218 const int w= b->width;
4594 a96d905dcbaa Add av_ prefix to clip functions reimar parents: 4588 diff changeset	3219 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3220 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3221 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3222 int x,y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3223 START_TIMER
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3224
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3225 if(s->qlog == LOSSLESS_QLOG) return;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3226
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	3227 for(y=start_y; y<end_y; y++){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3228 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3229 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3230 for(x=0; x<w; x++){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3231 int i= line[x];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3232 if(i<0){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3233 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3234 }else if(i>0){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3235 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3236 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3237 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3238 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3239 if(w > 200 /level+1 == s->spatial_decomposition_count/){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3240 STOP_TIMER("dquant")
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3241 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3242 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3243
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3244 static void dequantize(SnowContext s, SubBand b, IDWTELEM *src, int stride){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3245 const int w= b->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3246 const int h= b->height;
4594 a96d905dcbaa Add av_ prefix to clip functions reimar parents: 4588 diff changeset	3247 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3248 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3249 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3250 int x,y;
2195 df3079d7806f merge decode_subband() and dequantize() michael parents: 2194 diff changeset	3251 START_TIMER
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3252
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3253 if(s->qlog == LOSSLESS_QLOG) return;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3254
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3255 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3256 for(x=0; x<w; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3257 int i= src[x + y*stride];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3258 if(i<0){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3259 src[x + ystride]= -((-iqmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3260 }else if(i>0){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3261 src[x + ystride]= (( iqmul + qadd)>>(QEXPSHIFT));
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3262 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3263 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3264 }
2195 df3079d7806f merge decode_subband() and dequantize() michael parents: 2194 diff changeset	3265 if(w > 200 /level+1 == s->spatial_decomposition_count/){
df3079d7806f merge decode_subband() and dequantize() michael parents: 2194 diff changeset	3266 STOP_TIMER("dquant")
df3079d7806f merge decode_subband() and dequantize() michael parents: 2194 diff changeset	3267 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3268 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3269
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3270 static void decorrelate(SnowContext s, SubBand b, IDWTELEM *src, int stride, int inverse, int use_median){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3271 const int w= b->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3272 const int h= b->height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3273 int x,y;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3274
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3275 for(y=h-1; y>=0; y--){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3276 for(x=w-1; x>=0; x--){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3277 int i= x + y*stride;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3278
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3279 if(x){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3280 if(use_median){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3281 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3282 else src[i] -= src[i - 1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3283 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3284 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3285 else src[i] -= src[i - 1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3286 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3287 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3288 if(y) src[i] -= src[i - stride];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3289 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3290 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3291 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3292 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3293
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3294 static void correlate_slice_buffered(SnowContext s, slice_buffer sb, SubBand b, IDWTELEM src, int stride, int inverse, int use_median, int start_y, int end_y){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3295 const int w= b->width;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3296 int x,y;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3297
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3298 // START_TIMER
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3299
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3300 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3301 IDWTELEM * prev;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3302
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	3303 if (start_y != 0)
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	3304 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3305
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	3306 for(y=start_y; y<end_y; y++){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3307 prev = line;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3308 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3309 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3310 for(x=0; x<w; x++){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3311 if(x){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3312 if(use_median){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3313 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3314 else line[x] += line[x - 1];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3315 }else{
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3316 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3317 else line[x] += line[x - 1];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3318 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3319 }else{
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3320 if(y) line[x] += prev[x];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3321 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3322 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3323 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3324
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3325 // STOP_TIMER("correlate")
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3326 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3327
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3328 static void correlate(SnowContext s, SubBand b, IDWTELEM *src, int stride, int inverse, int use_median){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3329 const int w= b->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3330 const int h= b->height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3331 int x,y;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3332
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3333 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3334 for(x=0; x<w; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3335 int i= x + y*stride;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3336
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3337 if(x){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3338 if(use_median){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3339 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3340 else src[i] += src[i - 1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3341 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3342 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3343 else src[i] += src[i - 1];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3344 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3345 }else{
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3346 if(y) src[i] += src[i - stride];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3347 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3348 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3349 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3350 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3351
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3352 static void encode_header(SnowContext *s){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3353 int plane_index, level, orientation;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3354 uint8_t kstate[32];
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3355
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3356 memset(kstate, MID_STATE, sizeof(kstate));
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3357
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3358 put_rac(&s->c, kstate, s->keyframe);
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3359 if(s->keyframe \|\| s->always_reset){
2199 e0b08bdf565d 10l (keyframes and context resets) michael parents: 2198 diff changeset	3360 reset_contexts(s);
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3361 s->last_spatial_decomposition_type=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3362 s->last_qlog=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3363 s->last_qbias=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3364 s->last_mv_scale=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3365 s->last_block_max_depth= 0;
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3366 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3367 if(s->keyframe){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3368 put_symbol(&s->c, s->header_state, s->version, 0);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3369 put_rac(&s->c, s->header_state, s->always_reset);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3370 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3371 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3372 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3373 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3374 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3375 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3376 put_rac(&s->c, s->header_state, s->spatial_scalability);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3377 // put_rac(&s->c, s->header_state, s->rate_scalability);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3378 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3379
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3380 for(plane_index=0; plane_index<2; plane_index++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3381 for(level=0; level<s->spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3382 for(orientation=level ? 1:0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3383 if(orientation==2) continue;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3384 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3385 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3386 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3387 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3388 }
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3389 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3390 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3391 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3392 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3393 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3394
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3395 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3396 s->last_qlog = s->qlog;
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3397 s->last_qbias = s->qbias;
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3398 s->last_mv_scale = s->mv_scale;
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3399 s->last_block_max_depth = s->block_max_depth;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3400 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3401
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3402 static int decode_header(SnowContext *s){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3403 int plane_index, level, orientation;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3404 uint8_t kstate[32];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3405
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3406 memset(kstate, MID_STATE, sizeof(kstate));
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3407
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3408 s->keyframe= get_rac(&s->c, kstate);
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3409 if(s->keyframe \|\| s->always_reset){
2199 e0b08bdf565d 10l (keyframes and context resets) michael parents: 2198 diff changeset	3410 reset_contexts(s);
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3411 s->spatial_decomposition_type=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3412 s->qlog=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3413 s->qbias=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3414 s->mv_scale=
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3415 s->block_max_depth= 0;
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3416 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3417 if(s->keyframe){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3418 s->version= get_symbol(&s->c, s->header_state, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3419 if(s->version>0){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3420 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3421 return -1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3422 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3423 s->always_reset= get_rac(&s->c, s->header_state);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3424 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3425 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3426 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3427 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3428 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3429 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3430 s->spatial_scalability= get_rac(&s->c, s->header_state);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3431 // s->rate_scalability= get_rac(&s->c, s->header_state);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3432 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3433
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3434 for(plane_index=0; plane_index<3; plane_index++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3435 for(level=0; level<s->spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3436 for(orientation=level ? 1:0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3437 int q;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3438 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3439 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3440 else q= get_symbol(&s->c, s->header_state, 1);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3441 s->plane[plane_index].band[level][orientation].qlog= q;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3442 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3443 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3444 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3445 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3446
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3447 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
5588 effa59ca89b3 we only have 2 wavelets, the 3rd was just for experimentation ... michael parents: 5587 diff changeset	3448 if(s->spatial_decomposition_type > 1){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3449 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3450 return -1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3451 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3452
4331 e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3453 s->qlog += get_symbol(&s->c, s->header_state, 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3454 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3455 s->qbias += get_symbol(&s->c, s->header_state, 1);
e571dfe677be store a few values in the header as difference to the last michael parents: 4283 diff changeset	3456 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3303 68721b62a528 sanity checks, some might have been exploitable ... michael parents: 3206 diff changeset	3457 if(s->block_max_depth > 1 \|\| s->block_max_depth < 0){
2952 d7be13d3ef00 check block_max_depth (fixes crash) michael parents: 2951 diff changeset	3458 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
d7be13d3ef00 check block_max_depth (fixes crash) michael parents: 2951 diff changeset	3459 s->block_max_depth= 0;
d7be13d3ef00 check block_max_depth (fixes crash) michael parents: 2951 diff changeset	3460 return -1;
d7be13d3ef00 check block_max_depth (fixes crash) michael parents: 2951 diff changeset	3461 }
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3462
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3463 return 0;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3464 }
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3465
3075 961af1358c7f add static keyword to some functions mru parents: 3063 diff changeset	3466 static void init_qexp(void){
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3467 int i;
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3468 double v=128;
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3469
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3470 for(i=0; i<QROOT; i++){
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3471 qexp[i]= lrintf(v);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3472 v *= pow(2, 1.0 / QROOT);
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3473 }
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3474 }
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3475
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3476 static int common_init(AVCodecContext *avctx){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3477 SnowContext *s = avctx->priv_data;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3478 int width, height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3479 int level, orientation, plane_index, dec;
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	3480 int i, j;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3481
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3482 s->avctx= avctx;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3483
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3484 dsputil_init(&s->dsp, avctx);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3485
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3486 #define mcf(dx,dy)\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3487 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3488 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2224 11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3489 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3490 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3491 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3492 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3493
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3494 mcf( 0, 0)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3495 mcf( 4, 0)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3496 mcf( 8, 0)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3497 mcf(12, 0)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3498 mcf( 0, 4)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3499 mcf( 4, 4)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3500 mcf( 8, 4)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3501 mcf(12, 4)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3502 mcf( 0, 8)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3503 mcf( 4, 8)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3504 mcf( 8, 8)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3505 mcf(12, 8)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3506 mcf( 0,12)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3507 mcf( 4,12)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3508 mcf( 8,12)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3509 mcf(12,12)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3510
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3511 #define mcfh(dx,dy)\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3512 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3513 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2224 11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3514 mc_block_hpel ## dx ## dy ## 16;\
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3515 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3516 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
11d54cb7ac4e 100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...) michael parents: 2223 diff changeset	3517 mc_block_hpel ## dx ## dy ## 8;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3518
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3519 mcfh(0, 0)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3520 mcfh(8, 0)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3521 mcfh(0, 8)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3522 mcfh(8, 8)
2600 2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3523
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3524 if(!qexp[0])
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3525 init_qexp();
2bcea6618a87 fix QROOT != 8 michael parents: 2596 diff changeset	3526
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3527 dec= s->spatial_decomposition_count= 5;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3528 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3529
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3530 s->chroma_h_shift= 1; //FIXME XXX
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3531 s->chroma_v_shift= 1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3532
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3533 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3534
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3535 width= s->avctx->width;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3536 height= s->avctx->height;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3537
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3538 s->spatial_idwt_buffer= av_mallocz(widthheightsizeof(IDWTELEM));
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3539 s->spatial_dwt_buffer= av_mallocz(widthheightsizeof(DWTELEM)); //FIXME this doesnt belong here
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3540
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3541 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3542 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3543
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3544 for(plane_index=0; plane_index<3; plane_index++){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3545 int w= s->avctx->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3546 int h= s->avctx->height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3547
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3548 if(plane_index){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3549 w>>= s->chroma_h_shift;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3550 h>>= s->chroma_v_shift;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3551 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3552 s->plane[plane_index].width = w;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3553 s->plane[plane_index].height= h;
2160 11e1425b3a66 cleanup michael parents: 2159 diff changeset	3554 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3555 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3556 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3557 SubBand *b= &s->plane[plane_index].band[level][orientation];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3558
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3559 b->buf= s->spatial_dwt_buffer;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3560 b->level= level;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3561 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3562 b->width = (w + !(orientation&1))>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3563 b->height= (h + !(orientation>1))>>1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3564
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3565 b->stride_line = 1 << (s->spatial_decomposition_count - level);
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3566 b->buf_x_offset = 0;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3567 b->buf_y_offset = 0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3568
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3569 if(orientation&1){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3570 b->buf += (w+1)>>1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3571 b->buf_x_offset = (w+1)>>1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3572 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3573 if(orientation>1){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3574 b->buf += b->stride>>1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3575 b->buf_y_offset = b->stride_line >> 1;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3576 }
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3577 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3578
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3579 if(level)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3580 b->parent= &s->plane[plane_index].band[level-1][orientation];
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	3581 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3582 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3583 w= (w+1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3584 h= (h+1)>>1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3585 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3586 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3587
3325 c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	3588 for(i=0; i<MAX_REF_FRAMES; i++)
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	3589 for(j=0; j<MAX_REF_FRAMES; j++)
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	3590 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
c2a017de6bea Snow: scale predicted mv based on which reference frame the neighbors used. lorenm parents: 3324 diff changeset	3591
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3592 reset_contexts(s);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3593 /*
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3594 width= s->width= avctx->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3595 height= s->height= avctx->height;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3596
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3597 assert(width && height);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3598 */
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3599 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3600
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3601 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3602 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3603
3322 0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3604 static int qscale2qlog(int qscale){
0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3605 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3606 + 61*QROOT/8; //<64 >60
0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3607 }
0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3608
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3609 static int ratecontrol_1pass(SnowContext s, AVFrame pict)
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3610 {
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3611 /* estimate the frame's complexity as a sum of weighted dwt coefs.
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3612 * FIXME we know exact mv bits at this point,
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3613 * but ratecontrol isn't set up to include them. */
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3614 uint32_t coef_sum= 0;
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3615 int level, orientation, delta_qlog;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3616
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3617 for(level=0; level<s->spatial_decomposition_count; level++){
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3618 for(orientation=level ? 1 : 0; orientation<4; orientation++){
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3619 SubBand *b= &s->plane[0].band[level][orientation];
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3620 IDWTELEM *buf= b->ibuf;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3621 const int w= b->width;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3622 const int h= b->height;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3623 const int stride= b->stride;
4594 a96d905dcbaa Add av_ prefix to clip functions reimar parents: 4588 diff changeset	3624 const int qlog= av_clip(2QROOT + b->qlog, 0, QROOT16);
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3625 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3626 const int qdiv= (1<<16)/qmul;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3627 int x, y;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3628 //FIXME this is ugly
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3629 for(y=0; y<h; y++)
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3630 for(x=0; x<w; x++)
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3631 buf[x+ystride]= b->buf[x+ystride];
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3632 if(orientation==0)
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3633 decorrelate(s, b, buf, stride, 1, 0);
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3634 for(y=0; y<h; y++)
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3635 for(x=0; x<w; x++)
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3636 coef_sum+= abs(buf[x+ystride]) qdiv >> 16;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3637 }
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3638 }
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3639
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3640 /* ugly, ratecontrol just takes a sqrt again */
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3641 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3642 assert(coef_sum < INT_MAX);
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3643
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3644 if(pict->pict_type == I_TYPE){
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3645 s->m.current_picture.mb_var_sum= coef_sum;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3646 s->m.current_picture.mc_mb_var_sum= 0;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3647 }else{
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3648 s->m.current_picture.mc_mb_var_sum= coef_sum;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3649 s->m.current_picture.mb_var_sum= 0;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3650 }
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3651
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3652 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3766 acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	3653 if (pict->quality < 0)
4011 5bce97c30a69 -1 is a valid return value in ratecontrol_1pass() -> 100l for takis michael parents: 4001 diff changeset	3654 return INT_MIN;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3655 s->lambda= pict->quality * 3/2;
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3656 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3657 s->qlog+= delta_qlog;
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3658 return delta_qlog;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3659 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3660
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3661 static void calculate_vissual_weight(SnowContext s, Plane p){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3662 int width = p->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3663 int height= p->height;
2198 970c2de19b2e cleanup michael parents: 2197 diff changeset	3664 int level, orientation, x, y;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3665
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3666 for(level=0; level<s->spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3667 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3668 SubBand *b= &p->band[level][orientation];
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3669 IDWTELEM *ibuf= b->ibuf;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3670 int64_t error=0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3671
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3672 memset(s->spatial_idwt_buffer, 0, sizeof(s->spatial_idwt_buffer)width*height);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3673 ibuf[b->width/2 + b->height/2b->stride]= 25616;
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3674 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3675 for(y=0; y<height; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3676 for(x=0; x<width; x++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3677 int64_t d= s->spatial_idwt_buffer[x + ywidth]16;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3678 error += d*d;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3679 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3680 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3681
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3682 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2164 cbac56a6244f cleanup michael parents: 2161 diff changeset	3683 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/, sqrt(error)/);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3684 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3685 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3686 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3687
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3688 static int encode_init(AVCodecContext *avctx)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3689 {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3690 SnowContext *s = avctx->priv_data;
2198 970c2de19b2e cleanup michael parents: 2197 diff changeset	3691 int plane_index;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3692
2658 d1609cfeb1d0 #defines for strict_std_compliance and split between inofficial extensions and non standarized things michael parents: 2635 diff changeset	3693 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2628 511e3afc43e1 Ministry of English Composition, reporting for duty (and the word is "skipped", not "skiped"; "skiped" would rhyme with "hyped") melanson parents: 2610 diff changeset	3694 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2658 d1609cfeb1d0 #defines for strict_std_compliance and split between inofficial extensions and non standarized things michael parents: 2635 diff changeset	3695 "use vstrict=-2 / -strict -2 to use it anyway\n");
2151 44c973bfec0f dont segfault without -strict -1 michael parents: 2150 diff changeset	3696 return -1;
44c973bfec0f dont segfault without -strict -1 michael parents: 2150 diff changeset	3697 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3698
3327 955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3699 if(avctx->prediction_method == DWT_97
955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3700 && (avctx->flags & CODEC_FLAG_QSCALE)
955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3701 && avctx->global_quality == 0){
955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3702 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3703 return -1;
955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3704 }
955096780e7c Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless. lorenm parents: 3326 diff changeset	3705
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3706 common_init(avctx);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3707 alloc_blocks(s);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3708
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3709 s->version=0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3710
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3711 s->m.avctx = avctx;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3712 s->m.flags = avctx->flags;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3713 s->m.bit_rate= avctx->bit_rate;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3714
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3715 s->m.me.scratchpad= av_mallocz((avctx->width+64)2162sizeof(uint8_t));
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3716 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3717 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	3718 s->m.obmc_scratchpad= av_mallocz(MB_SIZEMB_SIZE12*sizeof(uint32_t));
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3719 h263_encode_init(&s->m); //mv_penalty
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3720
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3721 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3722
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3723 if(avctx->flags&CODEC_FLAG_PASS1){
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3724 if(!avctx->stats_out)
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3725 avctx->stats_out = av_mallocz(256);
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3726 }
3322 0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3727 if((avctx->flags&CODEC_FLAG_PASS2) \|\| !(avctx->flags&CODEC_FLAG_QSCALE)){
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3728 if(ff_rate_control_init(&s->m) < 0)
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3729 return -1;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3730 }
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3731 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE\|CODEC_FLAG_PASS2));
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3732
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3733 for(plane_index=0; plane_index<3; plane_index++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3734 calculate_vissual_weight(s, &s->plane[plane_index]);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3735 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3736
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3737
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3738 avctx->coded_frame= &s->current_picture;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3739 switch(avctx->pix_fmt){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3740 // case PIX_FMT_YUV444P:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3741 // case PIX_FMT_YUV422P:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3742 case PIX_FMT_YUV420P:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3743 case PIX_FMT_GRAY8:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3744 // case PIX_FMT_YUV411P:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3745 // case PIX_FMT_YUV410P:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3746 s->colorspace_type= 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3747 break;
4494 ce643a22f049 Replace deprecated PIX_FMT names by the newer variants. diego parents: 4436 diff changeset	3748 /* case PIX_FMT_RGB32:
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3749 s->colorspace= 1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3750 break;*/
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3751 default:
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3752 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3753 return -1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3754 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3755 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3756 s->chroma_h_shift= 1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3757 s->chroma_v_shift= 1;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3758
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3759 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3760 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3761
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3762 s->avctx->get_buffer(s->avctx, &s->input_picture);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3763
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3764 if(s->avctx->me_method == ME_ITER){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3765 int i;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3766 int size= s->b_width * s->b_height << 2*s->block_max_depth;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3767 for(i=0; i<s->max_ref_frames; i++){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3768 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3769 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3770 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3771 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3772
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3773 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3774 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3775
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3776 static int frame_start(SnowContext *s){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3777 AVFrame tmp;
2187 eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3778 int w= s->avctx->width; //FIXME round up to x16 ?
eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3779 int h= s->avctx->height;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3780
2187 eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3781 if(s->current_picture.data[0]){
eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3782 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3783 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3784 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3785 }
eedd352930b2 edge bugfix michael parents: 2164 diff changeset	3786
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3787 tmp= s->last_picture[s->max_ref_frames-1];
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3788 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3789 s->last_picture[0]= s->current_picture;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3790 s->current_picture= tmp;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3791
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3792 if(s->keyframe){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3793 s->ref_frames= 0;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3794 }else{
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3795 int i;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3796 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3797 if(i && s->last_picture[i-1].key_frame)
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3798 break;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3799 s->ref_frames= i;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3800 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3801
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3802 s->current_picture.reference= 1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3803 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3804 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3805 return -1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3806 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3807
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3808 s->current_picture.key_frame= s->keyframe;
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3809
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3810 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3811 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3812
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3813 static int encode_frame(AVCodecContext avctx, unsigned char buf, int buf_size, void *data){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3814 SnowContext *s = avctx->priv_data;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3815 RangeCoder * const c= &s->c;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3816 AVFrame *pict = data;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3817 const int width= s->avctx->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3818 const int height= s->avctx->height;
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3819 int level, orientation, plane_index, i, y;
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3820 uint8_t rc_header_bak[sizeof(s->header_state)];
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3821 uint8_t rc_block_bak[sizeof(s->block_state)];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3822
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3823 ff_init_range_encoder(c, buf, buf_size);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3824 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3825
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3826 for(i=0; i<3; i++){
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3827 int shift= !!i;
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3828 for(y=0; y<(height>>shift); y++)
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3829 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3830 &pict->data[i][y * pict->linesize[i]],
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3831 width>>shift);
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3832 }
cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3833 s->new_picture = *pict;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3834
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3835 s->m.picture_number= avctx->frame_number;
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3836 if(avctx->flags&CODEC_FLAG_PASS2){
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3837 s->m.pict_type =
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3838 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3839 s->keyframe= pict->pict_type==FF_I_TYPE;
3766 acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	3840 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3193 66116775b315 obey vqscale in 2nd pass lorenm parents: 3190 diff changeset	3841 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3766 acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	3842 if (pict->quality < 0)
acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	3843 return -1;
acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	3844 }
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3845 }else{
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3846 s->keyframe= avctx->gop_size==0 \|\| avctx->frame_number % avctx->gop_size == 0;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3847 s->m.pict_type=
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3848 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3849 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3850
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3851 if(s->pass1_rc && avctx->frame_number == 0)
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3852 pict->quality= 2*FF_QP2LAMBDA;
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3853 if(pict->quality){
3322 0b4f548dfb44 Snow: 10l. 1pass ratecontrol failed to set snow's internal quant. lorenm parents: 3314 diff changeset	3854 s->qlog= qscale2qlog(pict->quality);
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3855 s->lambda = pict->quality * 3/2;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3856 }
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3857 if(s->qlog < 0 \|\| (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3858 s->qlog= LOSSLESS_QLOG;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3859 s->lambda = 0;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3860 }//else keep previous frame's qlog until after motion est
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3861
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3862 frame_start(s);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3863
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3864 s->m.current_picture_ptr= &s->m.current_picture;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3865 if(pict->pict_type == P_TYPE){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3866 int block_width = (width +15)>>4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3867 int block_height= (height+15)>>4;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3868 int stride= s->current_picture.linesize[0];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3869
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3870 assert(s->current_picture.data[0]);
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3871 assert(s->last_picture[0].data[0]);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3872
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3873 s->m.avctx= s->avctx;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3874 s->m.current_picture.data[0]= s->current_picture.data[0];
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3875 s->m. last_picture.data[0]= s->last_picture[0].data[0];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3876 s->m. new_picture.data[0]= s-> input_picture.data[0];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3877 s->m. last_picture_ptr= &s->m. last_picture;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3878 s->m.linesize=
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3879 s->m. last_picture.linesize[0]=
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3880 s->m. new_picture.linesize[0]=
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3881 s->m.current_picture.linesize[0]= stride;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3882 s->m.uvlinesize= s->current_picture.linesize[1];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3883 s->m.width = width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3884 s->m.height= height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3885 s->m.mb_width = block_width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3886 s->m.mb_height= block_height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3887 s->m.mb_stride= s->m.mb_width+1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3888 s->m.b8_stride= 2*s->m.mb_width+1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3889 s->m.f_code=1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3890 s->m.pict_type= pict->pict_type;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3891 s->m.me_method= s->avctx->me_method;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3892 s->m.me.scene_change_score=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3893 s->m.flags= s->avctx->flags;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3894 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3895 s->m.out_format= FMT_H263;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3896 s->m.unrestricted_mv= 1;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3897
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3898 s->m.lambda = s->lambda;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3899 s->m.qscale= (s->m.lambda139 + FF_LAMBDA_SCALE64) >> (FF_LAMBDA_SHIFT + 7);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3900 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3901
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3902 s->m.dsp= s->dsp; //move
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3903 ff_init_me(&s->m);
2993 cb0e26759cca iterative overlapped block based motion estimation for snow michael parents: 2979 diff changeset	3904 s->dsp= s->m.dsp;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3905 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3906
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3907 if(s->pass1_rc){
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3908 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3909 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3910 }
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3911
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3912 redo_frame:
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3913
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3914 s->m.pict_type = pict->pict_type;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3915 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3916
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3917 encode_header(s);
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3918 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3919 encode_blocks(s, 1);
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	3920 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3921
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3922 for(plane_index=0; plane_index<3; plane_index++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3923 Plane *p= &s->plane[plane_index];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3924 int w= p->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3925 int h= p->height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3926 int x, y;
2198 970c2de19b2e cleanup michael parents: 2197 diff changeset	3927 // int bits= put_bits_count(&s->c.pb);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3928
3338 937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	3929 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3930 //FIXME optimize
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3931 if(pict->data[plane_index]) //FIXME gray hack
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3932 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3933 for(x=0; x<w; x++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3934 s->spatial_idwt_buffer[yw + x]= pict->data[plane_index][ypict->linesize[plane_index] + x]<<FRAC_BITS;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3935 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3936 }
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3937 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3938
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3939 if( plane_index==0
ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3940 && pict->pict_type == P_TYPE
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3941 && !(avctx->flags&CODEC_FLAG_PASS2)
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3942 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3943 ff_init_range_encoder(c, buf, buf_size);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	3944 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3945 pict->pict_type= FF_I_TYPE;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3946 s->keyframe=1;
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	3947 s->current_picture.key_frame=1;
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3948 goto redo_frame;
70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	3949 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3950
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3951 if(s->qlog == LOSSLESS_QLOG){
ec7789e19e43 lossless support michael parents: 2160 diff changeset	3952 for(y=0; y<h; y++){
ec7789e19e43 lossless support michael parents: 2160 diff changeset	3953 for(x=0; x<w; x++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3954 s->spatial_dwt_buffer[yw + x]= (s->spatial_idwt_buffer[yw + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3955 }
ec7789e19e43 lossless support michael parents: 2160 diff changeset	3956 }
5575 a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	3957 }else{
a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	3958 for(y=0; y<h; y++){
a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	3959 for(x=0; x<w; x++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3960 s->spatial_dwt_buffer[yw + x]=s->spatial_idwt_buffer[yw + x]<<ENCODER_EXTRA_BITS;
5575 a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	3961 }
a6f5ed050335 use more bits on the encoder side michael parents: 5572 diff changeset	3962 }
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3963 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3964
2164 cbac56a6244f cleanup michael parents: 2161 diff changeset	3965 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	3966
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3967 if(s->pass1_rc && plane_index==0){
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3968 int delta_qlog = ratecontrol_1pass(s, pict);
4011 5bce97c30a69 -1 is a valid return value in ratecontrol_1pass() -> 100l for takis michael parents: 4001 diff changeset	3969 if (delta_qlog <= INT_MIN)
3766 acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	3970 return -1;
3661 b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3971 if(delta_qlog){
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3972 //reordering qlog in the bitstream would eliminate this reset
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3973 ff_init_range_encoder(c, buf, buf_size);
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3974 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3975 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3976 encode_header(s);
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3977 encode_blocks(s, 0);
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3978 }
b4425339894b fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same. lorenm parents: 3556 diff changeset	3979 }
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	3980
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3981 for(level=0; level<s->spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3982 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3983 SubBand *b= &p->band[level][orientation];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	3984
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3985 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3986 if(orientation==0)
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3987 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3988 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3989 assert(b->parent==NULL \|\| b->parent->stride == b->stride*2);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3990 if(orientation==0)
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	3991 correlate(s, b, b->ibuf, b->stride, 1, 0);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3992 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3993 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3994 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3995
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3996 for(level=0; level<s->spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3997 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3998 SubBand *b= &p->band[level][orientation];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	3999
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4000 dequantize(s, b, b->ibuf, b->stride);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4001 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4002 }
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4003
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4004 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4005 if(s->qlog == LOSSLESS_QLOG){
ec7789e19e43 lossless support michael parents: 2160 diff changeset	4006 for(y=0; y<h; y++){
ec7789e19e43 lossless support michael parents: 2160 diff changeset	4007 for(x=0; x<w; x++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4008 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4009 }
ec7789e19e43 lossless support michael parents: 2160 diff changeset	4010 }
ec7789e19e43 lossless support michael parents: 2160 diff changeset	4011 }
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	4012 {START_TIMER
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4013 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	4014 STOP_TIMER("pred-conv")}
3338 937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4015 }else{
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4016 //ME/MC only
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4017 if(pict->pict_type == I_TYPE){
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4018 for(y=0; y<h; y++){
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4019 for(x=0; x<w; x++){
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4020 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4021 pict->data[plane_index][y*pict->linesize[plane_index] + x];
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4022 }
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4023 }
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4024 }else{
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4025 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)wh);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4026 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3338 937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4027 }
937f14bb0f23 support doing motion estimation and compensation without any residual transform or coding michael parents: 3327 diff changeset	4028 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4029 if(s->avctx->flags&CODEC_FLAG_PSNR){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4030 int64_t error= 0;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4031
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4032 if(pict->data[plane_index]) //FIXME gray hack
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4033 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4034 for(x=0; x<w; x++){
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4035 int d= s->current_picture.data[plane_index][ys->current_picture.linesize[plane_index] + x] - pict->data[plane_index][ypict->linesize[plane_index] + x];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4036 error += d*d;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4037 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4038 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4039 s->avctx->error[plane_index] += error;
2232 960e3552e418 per picture psnr michael parents: 2224 diff changeset	4040 s->current_picture.error[plane_index] = error;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4041 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4042 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4043
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4044 if(s->last_picture[s->max_ref_frames-1].data[0])
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4045 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4046
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	4047 s->current_picture.coded_picture_number = avctx->frame_number;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	4048 s->current_picture.pict_type = pict->pict_type;
4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	4049 s->current_picture.quality = pict->quality;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4050 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4051 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4052 s->m.current_picture.display_picture_number =
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4053 s->m.current_picture.coded_picture_number = avctx->frame_number;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4054 s->m.current_picture.quality = pict->quality;
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4055 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4056 if(s->pass1_rc)
3766 acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	4057 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
acf9ca729bd2 Handle possible failure of ff_eval. takis parents: 3662 diff changeset	4058 return -1;
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4059 if(avctx->flags&CODEC_FLAG_PASS1)
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	4060 ff_write_pass1_stats(&s->m);
3313 be941215e8e6 Snow 1pass ratecontrol lorenm parents: 3303 diff changeset	4061 s->m.last_pict_type = s->m.pict_type;
4123 5c86acb39889 outputs bit spent on various encoding functions (motion vectors, overhead, etc) gpoirier parents: 4122 diff changeset	4062 avctx->frame_bits = s->m.frame_bits;
5c86acb39889 outputs bit spent on various encoding functions (motion vectors, overhead, etc) gpoirier parents: 4122 diff changeset	4063 avctx->mv_bits = s->m.mv_bits;
5c86acb39889 outputs bit spent on various encoding functions (motion vectors, overhead, etc) gpoirier parents: 4122 diff changeset	4064 avctx->misc_bits = s->m.misc_bits;
5c86acb39889 outputs bit spent on various encoding functions (motion vectors, overhead, etc) gpoirier parents: 4122 diff changeset	4065 avctx->p_tex_bits = s->m.p_tex_bits;
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	4066
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4067 emms_c();
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4068
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4069 return ff_rac_terminate(c);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4070 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4071
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4072 static void common_end(SnowContext *s){
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4073 int plane_index, level, orientation, i;
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4074
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4075 av_freep(&s->spatial_dwt_buffer);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4076 av_freep(&s->spatial_idwt_buffer);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4077
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4078 av_freep(&s->m.me.scratchpad);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4079 av_freep(&s->m.me.map);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4080 av_freep(&s->m.me.score_map);
3033 e8599ab02b38 faster iterative_me: avoid duplicate mc of neighboring blocks. lorenm parents: 3020 diff changeset	4081 av_freep(&s->m.obmc_scratchpad);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4082
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	4083 av_freep(&s->block);
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4084
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4085 for(i=0; i<MAX_REF_FRAMES; i++){
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4086 av_freep(&s->ref_mvs[i]);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4087 av_freep(&s->ref_scores[i]);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4088 if(s->last_picture[i].data[0])
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4089 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4090 }
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4091
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4092 for(plane_index=0; plane_index<3; plane_index++){
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4093 for(level=s->spatial_decomposition_count-1; level>=0; level--){
77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4094 for(orientation=level ? 1 : 0; orientation<4; orientation++){
77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4095 SubBand *b= &s->plane[plane_index].band[level][orientation];
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4096
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4097 av_freep(&b->x_coeff);
2192 77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4098 }
77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4099 }
77167ed8453f 46% faster decode_subband() michael parents: 2191 diff changeset	4100 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4101 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4102
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4103 static int encode_end(AVCodecContext *avctx)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4104 {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4105 SnowContext *s = avctx->priv_data;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4106
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4107 common_end(s);
2608 4fb7fa34050b allow 2pass ratecontrol. also fixes psnr displayed by mencoder. lorenm parents: 2607 diff changeset	4108 av_free(avctx->stats_out);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4109
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4110 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4111 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4112
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4113 static int decode_init(AVCodecContext *avctx)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4114 {
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4115 SnowContext *s = avctx->priv_data;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4116 int block_size;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4117
2635 eaa57c3336fc PIX_FMT_NONE and related fixes michael parents: 2634 diff changeset	4118 avctx->pix_fmt= PIX_FMT_YUV420P;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4119
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4120 common_init(avctx);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4121
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4122 block_size = MB_SIZE >> s->block_max_depth;
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4123 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4124
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4125 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4126 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4127
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4128 static int decode_frame(AVCodecContext avctx, void data, int data_size, uint8_t buf, int buf_size){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4129 SnowContext *s = avctx->priv_data;
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4130 RangeCoder * const c= &s->c;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4131 int bytes_read;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4132 AVFrame *picture = data;
2198 970c2de19b2e cleanup michael parents: 2197 diff changeset	4133 int level, orientation, plane_index;
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4134
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4135 ff_init_range_decoder(c, buf, buf_size);
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4136 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4137
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4138 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4139 decode_header(s);
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	4140 if(!s->block) alloc_blocks(s);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4141
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4142 frame_start(s);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4143 //keyframe flag dupliaction mess FIXME
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4144 if(avctx->debug&FF_DEBUG_PICT_INFO)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4145 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4146
2189 70b27300a496 quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily) michael parents: 2187 diff changeset	4147 decode_blocks(s);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4148
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4149 for(plane_index=0; plane_index<3; plane_index++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4150 Plane *p= &s->plane[plane_index];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4151 int w= p->width;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4152 int h= p->height;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4153 int x, y;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4154 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4155
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4156 if(s->avctx->debug&2048){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4157 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)wh);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4158 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4159
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4160 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4161 for(x=0; x<w; x++){
2249 2b1a5e1fd449 merge predict_plane() with DWTELEM->8bit conversation (21% faster) michael parents: 2246 diff changeset	4162 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4163 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4164 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4165 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4166 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4167
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4168 { START_TIMER
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4169 for(level=0; level<s->spatial_decomposition_count; level++){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4170 for(orientation=level ? 1 : 0; orientation<4; orientation++){
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4171 SubBand *b= &p->band[level][orientation];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4172 unpack_coeffs(s, b, b->parent, orientation);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4173 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4174 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4175 STOP_TIMER("unpack coeffs");
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4176 }
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4177
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4178 {START_TIMER
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4179 const int mb_h= s->b_height << s->block_max_depth;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4180 const int block_size = MB_SIZE >> s->block_max_depth;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4181 const int block_w = plane_index ? block_size/2 : block_size;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4182 int mb_y;
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4183 dwt_compose_t cs[MAX_DECOMPOSITIONS];
d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4184 int yd=0, yq=0;
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4185 int y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4186 int end_y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4187
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4188 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4189 for(mb_y=0; mb_y<=mb_h; mb_y++){
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4190
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4191 int slice_starty = block_w*mb_y;
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4192 int slice_h = block_w*(mb_y+1);
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4193 if (!(s->keyframe \|\| s->avctx->debug&512)){
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4194 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4195 slice_h -= (block_w >> 1);
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4196 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4197
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4198 {
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4199 START_TIMER
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4200 for(level=0; level<s->spatial_decomposition_count; level++){
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4201 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4202 SubBand *b= &p->band[level][orientation];
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4203 int start_y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4204 int end_y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4205 int our_mb_start = mb_y;
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4206 int our_mb_end = (mb_y + 1);
3012 088920c095fc cleanup michael parents: 3000 diff changeset	4207 const int extra= 3;
088920c095fc cleanup michael parents: 3000 diff changeset	4208 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
088920c095fc cleanup michael parents: 3000 diff changeset	4209 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4210 if (!(s->keyframe \|\| s->avctx->debug&512)){
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4211 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4212 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4213 }
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4214 start_y = FFMIN(b->height, start_y);
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4215 end_y = FFMIN(b->height, end_y);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4216
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4217 if (start_y != end_y){
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4218 if (orientation == 0){
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4219 SubBand * correlate_band = &p->band[0][0];
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4220 int correlate_end_y = FFMIN(b->height, end_y + 1);
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4221 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4222 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4223 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4224 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4225 }
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4226 else
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4227 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4228 }
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4229 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4230 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4231 STOP_TIMER("decode_subband_slice");
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4232 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4233
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4234 { START_TIMER
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4235 for(; yd<slice_h; yd+=4){
3198 6b9f0c4fbdbe First part of a series of speed-enchancing patches. gpoirier parents: 3197 diff changeset	4236 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4237 }
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4238 STOP_TIMER("idwt slice");}
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4239
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4240
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4241 if(s->qlog == LOSSLESS_QLOG){
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4242 for(; yq<slice_h && yq<h; yq++){
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4243 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4244 for(x=0; x<w; x++){
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4245 line[x] <<= FRAC_BITS;
2161 ec7789e19e43 lossless support michael parents: 2160 diff changeset	4246 }
ec7789e19e43 lossless support michael parents: 2160 diff changeset	4247 }
ec7789e19e43 lossless support michael parents: 2160 diff changeset	4248 }
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4249
5587 3ae03eacbe9f use 16bit IDWT (a SIMD implementation of it should be >2x faster then with michael parents: 5575 diff changeset	4250 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4251
2634 337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4252 y = FFMIN(p->height, slice_starty);
337217ecbb3e tighter snow slicing patch by (Yartrebo \|\| yartrebo earthlink net) michael parents: 2628 diff changeset	4253 end_y = FFMIN(p->height, slice_h);
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4254 while(y < end_y)
a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4255 slice_buffer_release(&s->sb, y++);
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4256 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4257
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4258 slice_buffer_flush(&s->sb);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4259
2562 d0a58dca5ad2 slice-based idwt (10% faster decoding) lorenm parents: 2521 diff changeset	4260 STOP_TIMER("idwt + predict_slices")}
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4261 }
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4262
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4263 emms_c();
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4264
3314 aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4265 if(s->last_picture[s->max_ref_frames-1].data[0])
aea2230e6033 Snow multiple reference frames lorenm parents: 3313 diff changeset	4266 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4267
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4268 if(!(s->avctx->debug&2048))
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4269 *picture= s->current_picture;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4270 else
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4271 *picture= s->mconly_picture;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4272
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4273 *data_size = sizeof(AVFrame);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4274
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4275 bytes_read= c->bytestream - c->bytestream_start;
28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4276 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4277
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4278 return bytes_read;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4279 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4280
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4281 static int decode_end(AVCodecContext *avctx)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4282 {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4283 SnowContext *s = avctx->priv_data;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4284
2589 a5a62827f195 Snow Slicing patch by (Yartrebo) yartrebo earthlink net michael parents: 2562 diff changeset	4285 slice_buffer_destroy(&s->sb);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4286
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4287 common_end(s);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4288
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4289 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4290 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4291
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4292 AVCodec snow_decoder = {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4293 "snow",
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4294 CODEC_TYPE_VIDEO,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4295 CODEC_ID_SNOW,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4296 sizeof(SnowContext),
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4297 decode_init,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4298 NULL,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4299 decode_end,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4300 decode_frame,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4301 0 /CODEC_CAP_DR1/ /\| CODEC_CAP_DRAW_HORIZ_BAND/,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4302 NULL
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4303 };
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4304
5224 6ced4aabe225 include snow encoder only if it is not disabled aurel parents: 5127 diff changeset	4305 #ifdef CONFIG_SNOW_ENCODER
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4306 AVCodec snow_encoder = {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4307 "snow",
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4308 CODEC_TYPE_VIDEO,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4309 CODEC_ID_SNOW,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4310 sizeof(SnowContext),
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4311 encode_init,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4312 encode_frame,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4313 encode_end,
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4314 };
2408 a6e4da1c28ee Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>) michael parents: 2368 diff changeset	4315 #endif
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4316
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4317
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4318 #if 0
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4319 #undef malloc
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4320 #undef free
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4321 #undef printf
5380 389366aa3458 Fix the self tests which are contained in some codecs and are using random(). takis parents: 5254 diff changeset	4322 #undef random
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4323
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4324 int main(){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4325 int width=256;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4326 int height=256;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4327 int buffer[2][width*height];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4328 SnowContext s;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4329 int i;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4330 s.spatial_decomposition_count=6;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4331 s.spatial_decomposition_type=1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4332
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4333 printf("testing 5/3 DWT\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4334 for(i=0; i<width*height; i++)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4335 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4336
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4337 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4338 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4339
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4340 for(i=0; i<width*height; i++)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4341 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4342
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4343 printf("testing 9/7 DWT\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4344 s.spatial_decomposition_type=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4345 for(i=0; i<width*height; i++)
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4346 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4347
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4348 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4349 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4350
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4351 for(i=0; i<width*height; i++)
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	4352 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4353
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4354 #if 0
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4355 printf("testing AC coder\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4356 memset(s.header_state, 0, sizeof(s.header_state));
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4357 ff_init_range_encoder(&s.c, buffer[0], 256*256);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4358 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4359
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4360 for(i=-256; i<256; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4361 START_TIMER
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	4362 put_symbol(&s.c, s.header_state, iii/3*FFABS(i), 1);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4363 STOP_TIMER("put_symbol")
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4364 }
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4365 ff_rac_terminate(&s.c);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4366
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4367 memset(s.header_state, 0, sizeof(s.header_state));
2335 28eb7b1dcefc CABAC -> range coder michael parents: 2252 diff changeset	4368 ff_init_range_decoder(&s.c, buffer[0], 256*256);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4369 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4370
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4371 for(i=-256; i<256; i++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4372 int j;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4373 START_TIMER
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4374 j= get_symbol(&s.c, s.header_state, 1);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4375 STOP_TIMER("get_symbol")
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	4376 if(j!=iii/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4377 }
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4378 #endif
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4379 {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4380 int level, orientation, x, y;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4381 int64_t errors[8][4];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4382 int64_t g=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4383
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4384 memset(errors, 0, sizeof(errors));
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4385 s.spatial_decomposition_count=3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4386 s.spatial_decomposition_type=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4387 for(level=0; level<s.spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4388 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4389 int w= width >> (s.spatial_decomposition_count-level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4390 int h= height >> (s.spatial_decomposition_count-level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4391 int stride= width << (s.spatial_decomposition_count-level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4392 DWTELEM *buf= buffer[0];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4393 int64_t error=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4394
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4395 if(orientation&1) buf+=w;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4396 if(orientation>1) buf+=stride>>1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4397
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4398 memset(buffer[0], 0, sizeof(int)widthheight);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4399 buf[w/2 + h/2stride]= 256256;
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4400 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4401 for(y=0; y<height; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4402 for(x=0; x<width; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4403 int64_t d= buffer[0][x + y*width];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4404 error += d*d;
4122 daae66c03857 Replace most of the %lld and %llx by their (cleaner) PRI64 counterparts. diego* parents: 4011 diff changeset	4405 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4406 }
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	4407 if(FFABS(height/2-y)<9 && level==2) printf("\n");
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4408 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4409 error= (int)(sqrt(error)+0.5);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4410 errors[level][orientation]= error;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4411 if(g) g=ff_gcd(g, error);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4412 else g= error;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4413 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4414 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4415 printf("static int const visual_weight[][4]={\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4416 for(level=0; level<s.spatial_decomposition_count; level++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4417 printf(" {");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4418 for(orientation=0; orientation<4; orientation++){
4122 daae66c03857 Replace most of the %lld and %llx by their (cleaner) PRI64 counterparts. diego* parents: 4011 diff changeset	4419 printf("%8"PRId64",", errors[level][orientation]/g);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4420 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4421 printf("},\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4422 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4423 printf("};\n");
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4424 {
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4425 int level=2;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4426 int orientation=3;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4427 int w= width >> (s.spatial_decomposition_count-level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4428 int h= height >> (s.spatial_decomposition_count-level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4429 int stride= width << (s.spatial_decomposition_count-level);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4430 DWTELEM *buf= buffer[0];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4431 int64_t error=0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4432
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4433 buf+=w;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4434 buf+=stride>>1;
2967 ef2149182f1c COSMETICS: Remove all trailing whitespace. diego parents: 2952 diff changeset	4435
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4436 memset(buffer[0], 0, sizeof(int)widthheight);
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4437 #if 1
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4438 for(y=0; y<height; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4439 for(x=0; x<width; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4440 int tab[4]={0,2,3,1};
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4441 buffer[0][x+widthy]= 256256tab[(x&1) + 2(y&1)];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4442 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4443 }
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4444 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4445 #else
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4446 for(y=0; y<h; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4447 for(x=0; x<w; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4448 buf[x + y*stride ]=169;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4449 buf[x + y*stride-w]=64;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4450 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4451 }
2951 98a2bd9c8674 make selftest code compile again michael parents: 2893 diff changeset	4452 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4453 #endif
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4454 for(y=0; y<height; y++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4455 for(x=0; x<width; x++){
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4456 int64_t d= buffer[0][x + y*width];
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4457 error += d*d;
4122 daae66c03857 Replace most of the %lld and %llx by their (cleaner) PRI64 counterparts. diego* parents: 4011 diff changeset	4458 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4459 }
4001 34fdffe98bd0 Rename ABS macro to FFABS. diego parents: 3947 diff changeset	4460 if(FFABS(height/2-y)<9) printf("\n");
2138 1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4461 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4462 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4463
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4464 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4465 return 0;
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4466 }
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4467 #endif
1ac13c350369 my experimental wavelet codec michael parents: diff changeset	4468

Mercurial > libavcodec.hg

annotate snow.c @ 5596:051caa9c1ba5 libavcodec