Mercurial > libavcodec.hg
comparison vp3.c @ 2722:b552196f4123 libavcodec
render by the slice rather than the plane
author | melanson |
---|---|
date | Sat, 21 May 2005 04:43:36 +0000 |
parents | 7a6db2f4e6c0 |
children | 5ae980827158 |
comparison
equal
deleted
inserted
replaced
2721:7a6db2f4e6c0 | 2722:b552196f4123 |
---|---|
2142 } | 2142 } |
2143 } | 2143 } |
2144 } | 2144 } |
2145 | 2145 |
2146 /* | 2146 /* |
2147 * Perform the final rendering for a particular slice of data. | |
2148 * The slice number ranges from 0..(macroblock_height - 1). | |
2149 */ | |
2150 static void render_slice(Vp3DecodeContext *s, int slice) | |
2151 { | |
2152 int x, y; | |
2153 int m, n; | |
2154 int i; /* indicates current fragment */ | |
2155 int16_t *dequantizer; | |
2156 DCTELEM __align16 block[64]; | |
2157 unsigned char *output_plane; | |
2158 unsigned char *last_plane; | |
2159 unsigned char *golden_plane; | |
2160 int stride; | |
2161 int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; | |
2162 int upper_motion_limit, lower_motion_limit; | |
2163 int motion_halfpel_index; | |
2164 uint8_t *motion_source; | |
2165 int plane; | |
2166 int plane_width; | |
2167 int plane_height; | |
2168 int slice_height; | |
2169 int current_macroblock_entry = slice * s->macroblock_width * 6; | |
2170 | |
2171 if (slice >= s->macroblock_height) | |
2172 return; | |
2173 | |
2174 for (plane = 0; plane < 3; plane++) { | |
2175 | |
2176 /* set up plane-specific parameters */ | |
2177 if (plane == 0) { | |
2178 output_plane = s->current_frame.data[0]; | |
2179 last_plane = s->last_frame.data[0]; | |
2180 golden_plane = s->golden_frame.data[0]; | |
2181 stride = s->current_frame.linesize[0]; | |
2182 if (!s->flipped_image) stride = -stride; | |
2183 upper_motion_limit = 7 * s->current_frame.linesize[0]; | |
2184 lower_motion_limit = s->height * s->current_frame.linesize[0] + s->width - 8; | |
2185 y = slice * FRAGMENT_PIXELS * 2; | |
2186 plane_width = s->width; | |
2187 plane_height = s->height; | |
2188 slice_height = y + FRAGMENT_PIXELS * 2; | |
2189 i = s->macroblock_fragments[current_macroblock_entry + 0]; | |
2190 } else if (plane == 1) { | |
2191 output_plane = s->current_frame.data[1]; | |
2192 last_plane = s->last_frame.data[1]; | |
2193 golden_plane = s->golden_frame.data[1]; | |
2194 stride = s->current_frame.linesize[1]; | |
2195 if (!s->flipped_image) stride = -stride; | |
2196 upper_motion_limit = 7 * s->current_frame.linesize[1]; | |
2197 lower_motion_limit = (s->height / 2) * s->current_frame.linesize[1] + (s->width / 2) - 8; | |
2198 y = slice * FRAGMENT_PIXELS; | |
2199 plane_width = s->width / 2; | |
2200 plane_height = s->height / 2; | |
2201 slice_height = y + FRAGMENT_PIXELS; | |
2202 i = s->macroblock_fragments[current_macroblock_entry + 4]; | |
2203 } else { | |
2204 output_plane = s->current_frame.data[2]; | |
2205 last_plane = s->last_frame.data[2]; | |
2206 golden_plane = s->golden_frame.data[2]; | |
2207 stride = s->current_frame.linesize[2]; | |
2208 if (!s->flipped_image) stride = -stride; | |
2209 upper_motion_limit = 7 * s->current_frame.linesize[2]; | |
2210 lower_motion_limit = (s->height / 2) * s->current_frame.linesize[2] + (s->width / 2) - 8; | |
2211 y = slice * FRAGMENT_PIXELS; | |
2212 plane_width = s->width / 2; | |
2213 plane_height = s->height / 2; | |
2214 slice_height = y + FRAGMENT_PIXELS; | |
2215 i = s->macroblock_fragments[current_macroblock_entry + 5]; | |
2216 } | |
2217 | |
2218 if(ABS(stride) > 2048) | |
2219 return; //various tables are fixed size | |
2220 | |
2221 /* for each fragment row in the slice (both of them)... */ | |
2222 for (; y < slice_height; y += 8) { | |
2223 | |
2224 /* for each fragment in a row... */ | |
2225 for (x = 0; x < plane_width; x += 8, i++) { | |
2226 | |
2227 if ((i < 0) || (i >= s->fragment_count)) { | |
2228 av_log(s->avctx, AV_LOG_ERROR, " vp3:render_slice(): bad fragment number (%d)\n", i); | |
2229 return; | |
2230 } | |
2231 | |
2232 /* transform if this block was coded */ | |
2233 if ((s->all_fragments[i].coding_method != MODE_COPY) && | |
2234 !((s->avctx->flags & CODEC_FLAG_GRAY) && plane)) { | |
2235 | |
2236 if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) || | |
2237 (s->all_fragments[i].coding_method == MODE_GOLDEN_MV)) | |
2238 motion_source= golden_plane; | |
2239 else | |
2240 motion_source= last_plane; | |
2241 | |
2242 motion_source += s->all_fragments[i].first_pixel; | |
2243 motion_halfpel_index = 0; | |
2244 | |
2245 /* sort out the motion vector if this fragment is coded | |
2246 * using a motion vector method */ | |
2247 if ((s->all_fragments[i].coding_method > MODE_INTRA) && | |
2248 (s->all_fragments[i].coding_method != MODE_USING_GOLDEN)) { | |
2249 int src_x, src_y; | |
2250 motion_x = s->all_fragments[i].motion_x; | |
2251 motion_y = s->all_fragments[i].motion_y; | |
2252 if(plane){ | |
2253 motion_x= (motion_x>>1) | (motion_x&1); | |
2254 motion_y= (motion_y>>1) | (motion_y&1); | |
2255 } | |
2256 | |
2257 src_x= (motion_x>>1) + x; | |
2258 src_y= (motion_y>>1) + y; | |
2259 if ((motion_x == 127) || (motion_y == 127)) | |
2260 av_log(s->avctx, AV_LOG_ERROR, " help! got invalid motion vector! (%X, %X)\n", motion_x, motion_y); | |
2261 | |
2262 motion_halfpel_index = motion_x & 0x01; | |
2263 motion_source += (motion_x >> 1); | |
2264 | |
2265 motion_halfpel_index |= (motion_y & 0x01) << 1; | |
2266 motion_source += ((motion_y >> 1) * stride); | |
2267 | |
2268 if(src_x<0 || src_y<0 || src_x + 9 >= plane_width || src_y + 9 >= plane_height){ | |
2269 uint8_t *temp= s->edge_emu_buffer; | |
2270 if(stride<0) temp -= 9*stride; | |
2271 else temp += 9*stride; | |
2272 | |
2273 ff_emulated_edge_mc(temp, motion_source, stride, 9, 9, src_x, src_y, plane_width, plane_height); | |
2274 motion_source= temp; | |
2275 } | |
2276 } | |
2277 | |
2278 | |
2279 /* first, take care of copying a block from either the | |
2280 * previous or the golden frame */ | |
2281 if (s->all_fragments[i].coding_method != MODE_INTRA) { | |
2282 /* Note, it is possible to implement all MC cases with | |
2283 put_no_rnd_pixels_l2 which would look more like the | |
2284 VP3 source but this would be slower as | |
2285 put_no_rnd_pixels_tab is better optimzed */ | |
2286 if(motion_halfpel_index != 3){ | |
2287 s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( | |
2288 output_plane + s->all_fragments[i].first_pixel, | |
2289 motion_source, stride, 8); | |
2290 }else{ | |
2291 int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 | |
2292 s->dsp.put_no_rnd_pixels_l2[1]( | |
2293 output_plane + s->all_fragments[i].first_pixel, | |
2294 motion_source - d, | |
2295 motion_source + stride + 1 + d, | |
2296 stride, 8); | |
2297 } | |
2298 dequantizer = s->inter_dequant; | |
2299 }else{ | |
2300 if (plane == 0) | |
2301 dequantizer = s->intra_y_dequant; | |
2302 else | |
2303 dequantizer = s->intra_c_dequant; | |
2304 } | |
2305 | |
2306 /* dequantize the DCT coefficients */ | |
2307 debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", | |
2308 i, s->all_fragments[i].coding_method, | |
2309 DC_COEFF(i), dequantizer[0]); | |
2310 | |
2311 if(s->avctx->idct_algo==FF_IDCT_VP3){ | |
2312 Coeff *coeff= s->coeffs + i; | |
2313 memset(block, 0, sizeof(block)); | |
2314 while(coeff->next){ | |
2315 block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; | |
2316 coeff= coeff->next; | |
2317 } | |
2318 }else{ | |
2319 Coeff *coeff= s->coeffs + i; | |
2320 memset(block, 0, sizeof(block)); | |
2321 while(coeff->next){ | |
2322 block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; | |
2323 coeff= coeff->next; | |
2324 } | |
2325 } | |
2326 | |
2327 /* invert DCT and place (or add) in final output */ | |
2328 | |
2329 if (s->all_fragments[i].coding_method == MODE_INTRA) { | |
2330 if(s->avctx->idct_algo!=FF_IDCT_VP3) | |
2331 block[0] += 128<<3; | |
2332 s->dsp.idct_put( | |
2333 output_plane + s->all_fragments[i].first_pixel, | |
2334 stride, | |
2335 block); | |
2336 } else { | |
2337 s->dsp.idct_add( | |
2338 output_plane + s->all_fragments[i].first_pixel, | |
2339 stride, | |
2340 block); | |
2341 } | |
2342 | |
2343 debug_idct("block after idct_%s():\n", | |
2344 (s->all_fragments[i].coding_method == MODE_INTRA)? | |
2345 "put" : "add"); | |
2346 for (m = 0; m < 8; m++) { | |
2347 for (n = 0; n < 8; n++) { | |
2348 debug_idct(" %3d", *(output_plane + | |
2349 s->all_fragments[i].first_pixel + (m * stride + n))); | |
2350 } | |
2351 debug_idct("\n"); | |
2352 } | |
2353 debug_idct("\n"); | |
2354 | |
2355 } else { | |
2356 | |
2357 /* copy directly from the previous frame */ | |
2358 s->dsp.put_pixels_tab[1][0]( | |
2359 output_plane + s->all_fragments[i].first_pixel, | |
2360 last_plane + s->all_fragments[i].first_pixel, | |
2361 stride, 8); | |
2362 | |
2363 } | |
2364 } | |
2365 } | |
2366 } | |
2367 | |
2368 /* future loop filter logic goes here... */ | |
2369 /* algorithm: | |
2370 * if (slice != 0) | |
2371 * run filter on 1st row of Y slice | |
2372 * run filter on U slice | |
2373 * run filter on V slice | |
2374 * run filter on 2nd row of Y slice | |
2375 */ | |
2376 | |
2377 /* this looks like a good place for slice dispatch... */ | |
2378 /* algorithm: | |
2379 * if (slice > 0) | |
2380 * dispatch (slice - 1); | |
2381 * if (slice == s->macroblock_height - 1) | |
2382 * dispatch (slice); // handle last slice | |
2383 */ | |
2384 | |
2385 emms_c(); | |
2386 } | |
2387 | |
2388 /* | |
2147 * This function performs the final rendering of each fragment's data | 2389 * This function performs the final rendering of each fragment's data |
2148 * onto the output frame. | 2390 * onto the output frame. |
2149 */ | 2391 */ |
2150 static void render_fragments(Vp3DecodeContext *s, | 2392 static void render_fragments(Vp3DecodeContext *s, |
2151 int first_fragment, | 2393 int first_fragment, |
2772 uint8_t *buf, int buf_size) | 3014 uint8_t *buf, int buf_size) |
2773 { | 3015 { |
2774 Vp3DecodeContext *s = avctx->priv_data; | 3016 Vp3DecodeContext *s = avctx->priv_data; |
2775 GetBitContext gb; | 3017 GetBitContext gb; |
2776 static int counter = 0; | 3018 static int counter = 0; |
3019 int i; | |
2777 | 3020 |
2778 init_get_bits(&gb, buf, buf_size * 8); | 3021 init_get_bits(&gb, buf, buf_size * 8); |
2779 | 3022 |
2780 if (s->theora && get_bits1(&gb)) | 3023 if (s->theora && get_bits1(&gb)) |
2781 { | 3024 { |
2915 } | 3158 } |
2916 STOP_TIMER("unpack_dct_coeffs")} | 3159 STOP_TIMER("unpack_dct_coeffs")} |
2917 {START_TIMER | 3160 {START_TIMER |
2918 | 3161 |
2919 reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); | 3162 reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); |
2920 STOP_TIMER("reverse_dc_prediction")} | |
2921 {START_TIMER | |
2922 render_fragments(s, 0, s->width, s->height, 0); | |
2923 STOP_TIMER("render_fragments")} | |
2924 | |
2925 if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { | 3163 if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { |
2926 reverse_dc_prediction(s, s->u_fragment_start, | 3164 reverse_dc_prediction(s, s->u_fragment_start, |
2927 s->fragment_width / 2, s->fragment_height / 2); | 3165 s->fragment_width / 2, s->fragment_height / 2); |
2928 reverse_dc_prediction(s, s->v_fragment_start, | 3166 reverse_dc_prediction(s, s->v_fragment_start, |
2929 s->fragment_width / 2, s->fragment_height / 2); | 3167 s->fragment_width / 2, s->fragment_height / 2); |
3168 } | |
3169 STOP_TIMER("reverse_dc_prediction")} | |
3170 {START_TIMER | |
3171 | |
3172 #if 1 | |
3173 for (i = 0; i < s->macroblock_height; i++) | |
3174 render_slice(s, i); | |
3175 #else | |
3176 render_fragments(s, 0, s->width, s->height, 0); | |
3177 if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { | |
2930 render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1); | 3178 render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1); |
2931 render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2); | 3179 render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2); |
2932 } else { | 3180 } else { |
2933 memset(s->current_frame.data[1], 0x80, s->width * s->height / 4); | 3181 memset(s->current_frame.data[1], 0x80, s->width * s->height / 4); |
2934 memset(s->current_frame.data[2], 0x80, s->width * s->height / 4); | 3182 memset(s->current_frame.data[2], 0x80, s->width * s->height / 4); |
2935 } | 3183 } |
3184 #endif | |
3185 STOP_TIMER("render_fragments")} | |
2936 | 3186 |
2937 {START_TIMER | 3187 {START_TIMER |
2938 apply_loop_filter(s); | 3188 apply_loop_filter(s); |
2939 STOP_TIMER("apply_loop_filter")} | 3189 STOP_TIMER("apply_loop_filter")} |
2940 #if KEYFRAMES_ONLY | 3190 #if KEYFRAMES_ONLY |