Mercurial > libavcodec.hg
comparison jrevdct.c @ 2263:258f21820108 libavcodec
porting optimizations from 4x4 dct to 8x8
author | michael |
---|---|
date | Sun, 26 Sep 2004 17:36:53 +0000 |
parents | 7a1c3178d759 |
children | ef2149182f1c |
comparison
equal
deleted
inserted
replaced
2262:7a1c3178d759 | 2263:258f21820108 |
---|---|
233 | 233 |
234 /* Even part: reverse the even part of the forward DCT. */ | 234 /* Even part: reverse the even part of the forward DCT. */ |
235 /* The rotator is sqrt(2)*c(-6). */ | 235 /* The rotator is sqrt(2)*c(-6). */ |
236 { | 236 { |
237 if (d6) { | 237 if (d6) { |
238 if (d4) { | |
239 if (d2) { | 238 if (d2) { |
240 if (d0) { | |
241 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | 239 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ |
242 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | 240 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
243 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | 241 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
244 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | 242 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
245 | 243 |
248 | 246 |
249 tmp10 = tmp0 + tmp3; | 247 tmp10 = tmp0 + tmp3; |
250 tmp13 = tmp0 - tmp3; | 248 tmp13 = tmp0 - tmp3; |
251 tmp11 = tmp1 + tmp2; | 249 tmp11 = tmp1 + tmp2; |
252 tmp12 = tmp1 - tmp2; | 250 tmp12 = tmp1 - tmp2; |
253 } else { | 251 } else { |
254 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ | |
255 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
256 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
257 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
258 | |
259 tmp0 = d4 << CONST_BITS; | |
260 | |
261 tmp10 = tmp0 + tmp3; | |
262 tmp13 = tmp0 - tmp3; | |
263 tmp11 = tmp2 - tmp0; | |
264 tmp12 = -(tmp0 + tmp2); | |
265 } | |
266 } else { | |
267 if (d0) { | |
268 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | 252 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ |
269 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | 253 tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
270 tmp3 = MULTIPLY(d6, FIX_0_541196100); | 254 tmp3 = MULTIPLY(d6, FIX_0_541196100); |
271 | 255 |
272 tmp0 = (d0 + d4) << CONST_BITS; | 256 tmp0 = (d0 + d4) << CONST_BITS; |
274 | 258 |
275 tmp10 = tmp0 + tmp3; | 259 tmp10 = tmp0 + tmp3; |
276 tmp13 = tmp0 - tmp3; | 260 tmp13 = tmp0 - tmp3; |
277 tmp11 = tmp1 + tmp2; | 261 tmp11 = tmp1 + tmp2; |
278 tmp12 = tmp1 - tmp2; | 262 tmp12 = tmp1 - tmp2; |
279 } else { | 263 } |
280 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ | 264 } else { |
281 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
282 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
283 | |
284 tmp0 = d4 << CONST_BITS; | |
285 | |
286 tmp10 = tmp0 + tmp3; | |
287 tmp13 = tmp0 - tmp3; | |
288 tmp11 = tmp2 - tmp0; | |
289 tmp12 = -(tmp0 + tmp2); | |
290 } | |
291 } | |
292 } else { | |
293 if (d2) { | 265 if (d2) { |
294 if (d0) { | |
295 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ | |
296 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
297 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
298 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
299 | |
300 tmp0 = d0 << CONST_BITS; | |
301 | |
302 tmp10 = tmp0 + tmp3; | |
303 tmp13 = tmp0 - tmp3; | |
304 tmp11 = tmp0 + tmp2; | |
305 tmp12 = tmp0 - tmp2; | |
306 } else { | |
307 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ | |
308 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
309 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
310 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
311 | |
312 tmp10 = tmp3; | |
313 tmp13 = -tmp3; | |
314 tmp11 = tmp2; | |
315 tmp12 = -tmp2; | |
316 } | |
317 } else { | |
318 if (d0) { | |
319 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ | |
320 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
321 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
322 | |
323 tmp0 = d0 << CONST_BITS; | |
324 | |
325 tmp10 = tmp0 + tmp3; | |
326 tmp13 = tmp0 - tmp3; | |
327 tmp11 = tmp0 + tmp2; | |
328 tmp12 = tmp0 - tmp2; | |
329 } else { | |
330 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ | |
331 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
332 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
333 | |
334 tmp10 = tmp3; | |
335 tmp13 = -tmp3; | |
336 tmp11 = tmp2; | |
337 tmp12 = -tmp2; | |
338 } | |
339 } | |
340 } | |
341 } else { | |
342 if (d4) { | |
343 if (d2) { | |
344 if (d0) { | |
345 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | 266 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ |
346 tmp2 = MULTIPLY(d2, FIX_0_541196100); | 267 tmp2 = MULTIPLY(d2, FIX_0_541196100); |
347 tmp3 = MULTIPLY(d2, FIX_1_306562965); | 268 tmp3 = MULTIPLY(d2, FIX_1_306562965); |
348 | 269 |
349 tmp0 = (d0 + d4) << CONST_BITS; | 270 tmp0 = (d0 + d4) << CONST_BITS; |
351 | 272 |
352 tmp10 = tmp0 + tmp3; | 273 tmp10 = tmp0 + tmp3; |
353 tmp13 = tmp0 - tmp3; | 274 tmp13 = tmp0 - tmp3; |
354 tmp11 = tmp1 + tmp2; | 275 tmp11 = tmp1 + tmp2; |
355 tmp12 = tmp1 - tmp2; | 276 tmp12 = tmp1 - tmp2; |
356 } else { | 277 } else { |
357 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ | |
358 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
359 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
360 | |
361 tmp0 = d4 << CONST_BITS; | |
362 | |
363 tmp10 = tmp0 + tmp3; | |
364 tmp13 = tmp0 - tmp3; | |
365 tmp11 = tmp2 - tmp0; | |
366 tmp12 = -(tmp0 + tmp2); | |
367 } | |
368 } else { | |
369 if (d0) { | |
370 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | 278 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ |
371 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | 279 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
372 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | 280 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
373 } else { | 281 } |
374 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ | |
375 tmp10 = tmp13 = d4 << CONST_BITS; | |
376 tmp11 = tmp12 = -tmp10; | |
377 } | |
378 } | |
379 } else { | |
380 if (d2) { | |
381 if (d0) { | |
382 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ | |
383 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
384 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
385 | |
386 tmp0 = d0 << CONST_BITS; | |
387 | |
388 tmp10 = tmp0 + tmp3; | |
389 tmp13 = tmp0 - tmp3; | |
390 tmp11 = tmp0 + tmp2; | |
391 tmp12 = tmp0 - tmp2; | |
392 } else { | |
393 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ | |
394 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
395 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
396 | |
397 tmp10 = tmp3; | |
398 tmp13 = -tmp3; | |
399 tmp11 = tmp2; | |
400 tmp12 = -tmp2; | |
401 } | |
402 } else { | |
403 if (d0) { | |
404 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ | |
405 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; | |
406 } else { | |
407 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ | |
408 tmp10 = tmp13 = tmp11 = tmp12 = 0; | |
409 } | |
410 } | |
411 } | |
412 } | 282 } |
413 | 283 |
414 /* Odd part per figure 8; the matrix is unitary and hence its | 284 /* Odd part per figure 8; the matrix is unitary and hence its |
415 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | 285 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
416 */ | 286 */ |
709 d7 = dataptr[DCTSIZE*7]; | 579 d7 = dataptr[DCTSIZE*7]; |
710 | 580 |
711 /* Even part: reverse the even part of the forward DCT. */ | 581 /* Even part: reverse the even part of the forward DCT. */ |
712 /* The rotator is sqrt(2)*c(-6). */ | 582 /* The rotator is sqrt(2)*c(-6). */ |
713 if (d6) { | 583 if (d6) { |
714 if (d4) { | |
715 if (d2) { | 584 if (d2) { |
716 if (d0) { | |
717 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ | 585 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ |
718 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | 586 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); |
719 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | 587 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); |
720 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | 588 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); |
721 | 589 |
724 | 592 |
725 tmp10 = tmp0 + tmp3; | 593 tmp10 = tmp0 + tmp3; |
726 tmp13 = tmp0 - tmp3; | 594 tmp13 = tmp0 - tmp3; |
727 tmp11 = tmp1 + tmp2; | 595 tmp11 = tmp1 + tmp2; |
728 tmp12 = tmp1 - tmp2; | 596 tmp12 = tmp1 - tmp2; |
729 } else { | 597 } else { |
730 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ | |
731 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
732 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
733 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
734 | |
735 tmp0 = d4 << CONST_BITS; | |
736 | |
737 tmp10 = tmp0 + tmp3; | |
738 tmp13 = tmp0 - tmp3; | |
739 tmp11 = tmp2 - tmp0; | |
740 tmp12 = -(tmp0 + tmp2); | |
741 } | |
742 } else { | |
743 if (d0) { | |
744 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ | 598 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ |
745 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | 599 tmp2 = MULTIPLY(-d6, FIX_1_306562965); |
746 tmp3 = MULTIPLY(d6, FIX_0_541196100); | 600 tmp3 = MULTIPLY(d6, FIX_0_541196100); |
747 | 601 |
748 tmp0 = (d0 + d4) << CONST_BITS; | 602 tmp0 = (d0 + d4) << CONST_BITS; |
750 | 604 |
751 tmp10 = tmp0 + tmp3; | 605 tmp10 = tmp0 + tmp3; |
752 tmp13 = tmp0 - tmp3; | 606 tmp13 = tmp0 - tmp3; |
753 tmp11 = tmp1 + tmp2; | 607 tmp11 = tmp1 + tmp2; |
754 tmp12 = tmp1 - tmp2; | 608 tmp12 = tmp1 - tmp2; |
755 } else { | 609 } |
756 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ | 610 } else { |
757 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
758 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
759 | |
760 tmp0 = d4 << CONST_BITS; | |
761 | |
762 tmp10 = tmp0 + tmp3; | |
763 tmp13 = tmp0 - tmp3; | |
764 tmp11 = tmp2 - tmp0; | |
765 tmp12 = -(tmp0 + tmp2); | |
766 } | |
767 } | |
768 } else { | |
769 if (d2) { | 611 if (d2) { |
770 if (d0) { | |
771 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ | |
772 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
773 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
774 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
775 | |
776 tmp0 = d0 << CONST_BITS; | |
777 | |
778 tmp10 = tmp0 + tmp3; | |
779 tmp13 = tmp0 - tmp3; | |
780 tmp11 = tmp0 + tmp2; | |
781 tmp12 = tmp0 - tmp2; | |
782 } else { | |
783 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ | |
784 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); | |
785 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); | |
786 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); | |
787 | |
788 tmp10 = tmp3; | |
789 tmp13 = -tmp3; | |
790 tmp11 = tmp2; | |
791 tmp12 = -tmp2; | |
792 } | |
793 } else { | |
794 if (d0) { | |
795 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ | |
796 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
797 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
798 | |
799 tmp0 = d0 << CONST_BITS; | |
800 | |
801 tmp10 = tmp0 + tmp3; | |
802 tmp13 = tmp0 - tmp3; | |
803 tmp11 = tmp0 + tmp2; | |
804 tmp12 = tmp0 - tmp2; | |
805 } else { | |
806 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ | |
807 tmp2 = MULTIPLY(-d6, FIX_1_306562965); | |
808 tmp3 = MULTIPLY(d6, FIX_0_541196100); | |
809 | |
810 tmp10 = tmp3; | |
811 tmp13 = -tmp3; | |
812 tmp11 = tmp2; | |
813 tmp12 = -tmp2; | |
814 } | |
815 } | |
816 } | |
817 } else { | |
818 if (d4) { | |
819 if (d2) { | |
820 if (d0) { | |
821 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ | 612 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ |
822 tmp2 = MULTIPLY(d2, FIX_0_541196100); | 613 tmp2 = MULTIPLY(d2, FIX_0_541196100); |
823 tmp3 = MULTIPLY(d2, FIX_1_306562965); | 614 tmp3 = MULTIPLY(d2, FIX_1_306562965); |
824 | 615 |
825 tmp0 = (d0 + d4) << CONST_BITS; | 616 tmp0 = (d0 + d4) << CONST_BITS; |
827 | 618 |
828 tmp10 = tmp0 + tmp3; | 619 tmp10 = tmp0 + tmp3; |
829 tmp13 = tmp0 - tmp3; | 620 tmp13 = tmp0 - tmp3; |
830 tmp11 = tmp1 + tmp2; | 621 tmp11 = tmp1 + tmp2; |
831 tmp12 = tmp1 - tmp2; | 622 tmp12 = tmp1 - tmp2; |
832 } else { | 623 } else { |
833 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ | |
834 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
835 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
836 | |
837 tmp0 = d4 << CONST_BITS; | |
838 | |
839 tmp10 = tmp0 + tmp3; | |
840 tmp13 = tmp0 - tmp3; | |
841 tmp11 = tmp2 - tmp0; | |
842 tmp12 = -(tmp0 + tmp2); | |
843 } | |
844 } else { | |
845 if (d0) { | |
846 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ | 624 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ |
847 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; | 625 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; |
848 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; | 626 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; |
849 } else { | 627 } |
850 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ | |
851 tmp10 = tmp13 = d4 << CONST_BITS; | |
852 tmp11 = tmp12 = -tmp10; | |
853 } | |
854 } | |
855 } else { | |
856 if (d2) { | |
857 if (d0) { | |
858 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ | |
859 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
860 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
861 | |
862 tmp0 = d0 << CONST_BITS; | |
863 | |
864 tmp10 = tmp0 + tmp3; | |
865 tmp13 = tmp0 - tmp3; | |
866 tmp11 = tmp0 + tmp2; | |
867 tmp12 = tmp0 - tmp2; | |
868 } else { | |
869 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ | |
870 tmp2 = MULTIPLY(d2, FIX_0_541196100); | |
871 tmp3 = MULTIPLY(d2, FIX_1_306562965); | |
872 | |
873 tmp10 = tmp3; | |
874 tmp13 = -tmp3; | |
875 tmp11 = tmp2; | |
876 tmp12 = -tmp2; | |
877 } | |
878 } else { | |
879 if (d0) { | |
880 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ | |
881 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; | |
882 } else { | |
883 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ | |
884 tmp10 = tmp13 = tmp11 = tmp12 = 0; | |
885 } | |
886 } | |
887 } | |
888 } | 628 } |
889 | 629 |
890 /* Odd part per figure 8; the matrix is unitary and hence its | 630 /* Odd part per figure 8; the matrix is unitary and hence its |
891 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. | 631 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. |
892 */ | 632 */ |