comparison jrevdct.c @ 2263:258f21820108 libavcodec

porting optimizations from 4x4 dct to 8x8
author michael
date Sun, 26 Sep 2004 17:36:53 +0000
parents 7a1c3178d759
children ef2149182f1c
comparison
equal deleted inserted replaced
2262:7a1c3178d759 2263:258f21820108
233 233
234 /* Even part: reverse the even part of the forward DCT. */ 234 /* Even part: reverse the even part of the forward DCT. */
235 /* The rotator is sqrt(2)*c(-6). */ 235 /* The rotator is sqrt(2)*c(-6). */
236 { 236 {
237 if (d6) { 237 if (d6) {
238 if (d4) {
239 if (d2) { 238 if (d2) {
240 if (d0) {
241 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 239 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
242 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 240 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
243 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 241 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
244 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 242 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
245 243
248 246
249 tmp10 = tmp0 + tmp3; 247 tmp10 = tmp0 + tmp3;
250 tmp13 = tmp0 - tmp3; 248 tmp13 = tmp0 - tmp3;
251 tmp11 = tmp1 + tmp2; 249 tmp11 = tmp1 + tmp2;
252 tmp12 = tmp1 - tmp2; 250 tmp12 = tmp1 - tmp2;
253 } else { 251 } else {
254 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
255 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
256 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
257 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
258
259 tmp0 = d4 << CONST_BITS;
260
261 tmp10 = tmp0 + tmp3;
262 tmp13 = tmp0 - tmp3;
263 tmp11 = tmp2 - tmp0;
264 tmp12 = -(tmp0 + tmp2);
265 }
266 } else {
267 if (d0) {
268 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 252 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
269 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 253 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
270 tmp3 = MULTIPLY(d6, FIX_0_541196100); 254 tmp3 = MULTIPLY(d6, FIX_0_541196100);
271 255
272 tmp0 = (d0 + d4) << CONST_BITS; 256 tmp0 = (d0 + d4) << CONST_BITS;
274 258
275 tmp10 = tmp0 + tmp3; 259 tmp10 = tmp0 + tmp3;
276 tmp13 = tmp0 - tmp3; 260 tmp13 = tmp0 - tmp3;
277 tmp11 = tmp1 + tmp2; 261 tmp11 = tmp1 + tmp2;
278 tmp12 = tmp1 - tmp2; 262 tmp12 = tmp1 - tmp2;
279 } else { 263 }
280 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ 264 } else {
281 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
282 tmp3 = MULTIPLY(d6, FIX_0_541196100);
283
284 tmp0 = d4 << CONST_BITS;
285
286 tmp10 = tmp0 + tmp3;
287 tmp13 = tmp0 - tmp3;
288 tmp11 = tmp2 - tmp0;
289 tmp12 = -(tmp0 + tmp2);
290 }
291 }
292 } else {
293 if (d2) { 265 if (d2) {
294 if (d0) {
295 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
296 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
297 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
298 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
299
300 tmp0 = d0 << CONST_BITS;
301
302 tmp10 = tmp0 + tmp3;
303 tmp13 = tmp0 - tmp3;
304 tmp11 = tmp0 + tmp2;
305 tmp12 = tmp0 - tmp2;
306 } else {
307 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
308 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
309 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
310 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
311
312 tmp10 = tmp3;
313 tmp13 = -tmp3;
314 tmp11 = tmp2;
315 tmp12 = -tmp2;
316 }
317 } else {
318 if (d0) {
319 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
320 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
321 tmp3 = MULTIPLY(d6, FIX_0_541196100);
322
323 tmp0 = d0 << CONST_BITS;
324
325 tmp10 = tmp0 + tmp3;
326 tmp13 = tmp0 - tmp3;
327 tmp11 = tmp0 + tmp2;
328 tmp12 = tmp0 - tmp2;
329 } else {
330 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
331 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
332 tmp3 = MULTIPLY(d6, FIX_0_541196100);
333
334 tmp10 = tmp3;
335 tmp13 = -tmp3;
336 tmp11 = tmp2;
337 tmp12 = -tmp2;
338 }
339 }
340 }
341 } else {
342 if (d4) {
343 if (d2) {
344 if (d0) {
345 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 266 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
346 tmp2 = MULTIPLY(d2, FIX_0_541196100); 267 tmp2 = MULTIPLY(d2, FIX_0_541196100);
347 tmp3 = MULTIPLY(d2, FIX_1_306562965); 268 tmp3 = MULTIPLY(d2, FIX_1_306562965);
348 269
349 tmp0 = (d0 + d4) << CONST_BITS; 270 tmp0 = (d0 + d4) << CONST_BITS;
351 272
352 tmp10 = tmp0 + tmp3; 273 tmp10 = tmp0 + tmp3;
353 tmp13 = tmp0 - tmp3; 274 tmp13 = tmp0 - tmp3;
354 tmp11 = tmp1 + tmp2; 275 tmp11 = tmp1 + tmp2;
355 tmp12 = tmp1 - tmp2; 276 tmp12 = tmp1 - tmp2;
356 } else { 277 } else {
357 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
358 tmp2 = MULTIPLY(d2, FIX_0_541196100);
359 tmp3 = MULTIPLY(d2, FIX_1_306562965);
360
361 tmp0 = d4 << CONST_BITS;
362
363 tmp10 = tmp0 + tmp3;
364 tmp13 = tmp0 - tmp3;
365 tmp11 = tmp2 - tmp0;
366 tmp12 = -(tmp0 + tmp2);
367 }
368 } else {
369 if (d0) {
370 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 278 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
371 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 279 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
372 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 280 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
373 } else { 281 }
374 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
375 tmp10 = tmp13 = d4 << CONST_BITS;
376 tmp11 = tmp12 = -tmp10;
377 }
378 }
379 } else {
380 if (d2) {
381 if (d0) {
382 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
383 tmp2 = MULTIPLY(d2, FIX_0_541196100);
384 tmp3 = MULTIPLY(d2, FIX_1_306562965);
385
386 tmp0 = d0 << CONST_BITS;
387
388 tmp10 = tmp0 + tmp3;
389 tmp13 = tmp0 - tmp3;
390 tmp11 = tmp0 + tmp2;
391 tmp12 = tmp0 - tmp2;
392 } else {
393 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
394 tmp2 = MULTIPLY(d2, FIX_0_541196100);
395 tmp3 = MULTIPLY(d2, FIX_1_306562965);
396
397 tmp10 = tmp3;
398 tmp13 = -tmp3;
399 tmp11 = tmp2;
400 tmp12 = -tmp2;
401 }
402 } else {
403 if (d0) {
404 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
405 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
406 } else {
407 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
408 tmp10 = tmp13 = tmp11 = tmp12 = 0;
409 }
410 }
411 }
412 } 282 }
413 283
414 /* Odd part per figure 8; the matrix is unitary and hence its 284 /* Odd part per figure 8; the matrix is unitary and hence its
415 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 285 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
416 */ 286 */
709 d7 = dataptr[DCTSIZE*7]; 579 d7 = dataptr[DCTSIZE*7];
710 580
711 /* Even part: reverse the even part of the forward DCT. */ 581 /* Even part: reverse the even part of the forward DCT. */
712 /* The rotator is sqrt(2)*c(-6). */ 582 /* The rotator is sqrt(2)*c(-6). */
713 if (d6) { 583 if (d6) {
714 if (d4) {
715 if (d2) { 584 if (d2) {
716 if (d0) {
717 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 585 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
718 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 586 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
719 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 587 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
720 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 588 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
721 589
724 592
725 tmp10 = tmp0 + tmp3; 593 tmp10 = tmp0 + tmp3;
726 tmp13 = tmp0 - tmp3; 594 tmp13 = tmp0 - tmp3;
727 tmp11 = tmp1 + tmp2; 595 tmp11 = tmp1 + tmp2;
728 tmp12 = tmp1 - tmp2; 596 tmp12 = tmp1 - tmp2;
729 } else { 597 } else {
730 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
731 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
732 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
733 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
734
735 tmp0 = d4 << CONST_BITS;
736
737 tmp10 = tmp0 + tmp3;
738 tmp13 = tmp0 - tmp3;
739 tmp11 = tmp2 - tmp0;
740 tmp12 = -(tmp0 + tmp2);
741 }
742 } else {
743 if (d0) {
744 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 598 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
745 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 599 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
746 tmp3 = MULTIPLY(d6, FIX_0_541196100); 600 tmp3 = MULTIPLY(d6, FIX_0_541196100);
747 601
748 tmp0 = (d0 + d4) << CONST_BITS; 602 tmp0 = (d0 + d4) << CONST_BITS;
750 604
751 tmp10 = tmp0 + tmp3; 605 tmp10 = tmp0 + tmp3;
752 tmp13 = tmp0 - tmp3; 606 tmp13 = tmp0 - tmp3;
753 tmp11 = tmp1 + tmp2; 607 tmp11 = tmp1 + tmp2;
754 tmp12 = tmp1 - tmp2; 608 tmp12 = tmp1 - tmp2;
755 } else { 609 }
756 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ 610 } else {
757 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
758 tmp3 = MULTIPLY(d6, FIX_0_541196100);
759
760 tmp0 = d4 << CONST_BITS;
761
762 tmp10 = tmp0 + tmp3;
763 tmp13 = tmp0 - tmp3;
764 tmp11 = tmp2 - tmp0;
765 tmp12 = -(tmp0 + tmp2);
766 }
767 }
768 } else {
769 if (d2) { 611 if (d2) {
770 if (d0) {
771 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
772 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
773 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
774 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
775
776 tmp0 = d0 << CONST_BITS;
777
778 tmp10 = tmp0 + tmp3;
779 tmp13 = tmp0 - tmp3;
780 tmp11 = tmp0 + tmp2;
781 tmp12 = tmp0 - tmp2;
782 } else {
783 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
784 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
785 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
786 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
787
788 tmp10 = tmp3;
789 tmp13 = -tmp3;
790 tmp11 = tmp2;
791 tmp12 = -tmp2;
792 }
793 } else {
794 if (d0) {
795 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
796 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
797 tmp3 = MULTIPLY(d6, FIX_0_541196100);
798
799 tmp0 = d0 << CONST_BITS;
800
801 tmp10 = tmp0 + tmp3;
802 tmp13 = tmp0 - tmp3;
803 tmp11 = tmp0 + tmp2;
804 tmp12 = tmp0 - tmp2;
805 } else {
806 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
807 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
808 tmp3 = MULTIPLY(d6, FIX_0_541196100);
809
810 tmp10 = tmp3;
811 tmp13 = -tmp3;
812 tmp11 = tmp2;
813 tmp12 = -tmp2;
814 }
815 }
816 }
817 } else {
818 if (d4) {
819 if (d2) {
820 if (d0) {
821 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 612 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
822 tmp2 = MULTIPLY(d2, FIX_0_541196100); 613 tmp2 = MULTIPLY(d2, FIX_0_541196100);
823 tmp3 = MULTIPLY(d2, FIX_1_306562965); 614 tmp3 = MULTIPLY(d2, FIX_1_306562965);
824 615
825 tmp0 = (d0 + d4) << CONST_BITS; 616 tmp0 = (d0 + d4) << CONST_BITS;
827 618
828 tmp10 = tmp0 + tmp3; 619 tmp10 = tmp0 + tmp3;
829 tmp13 = tmp0 - tmp3; 620 tmp13 = tmp0 - tmp3;
830 tmp11 = tmp1 + tmp2; 621 tmp11 = tmp1 + tmp2;
831 tmp12 = tmp1 - tmp2; 622 tmp12 = tmp1 - tmp2;
832 } else { 623 } else {
833 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
834 tmp2 = MULTIPLY(d2, FIX_0_541196100);
835 tmp3 = MULTIPLY(d2, FIX_1_306562965);
836
837 tmp0 = d4 << CONST_BITS;
838
839 tmp10 = tmp0 + tmp3;
840 tmp13 = tmp0 - tmp3;
841 tmp11 = tmp2 - tmp0;
842 tmp12 = -(tmp0 + tmp2);
843 }
844 } else {
845 if (d0) {
846 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 624 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
847 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 625 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
848 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 626 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
849 } else { 627 }
850 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
851 tmp10 = tmp13 = d4 << CONST_BITS;
852 tmp11 = tmp12 = -tmp10;
853 }
854 }
855 } else {
856 if (d2) {
857 if (d0) {
858 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
859 tmp2 = MULTIPLY(d2, FIX_0_541196100);
860 tmp3 = MULTIPLY(d2, FIX_1_306562965);
861
862 tmp0 = d0 << CONST_BITS;
863
864 tmp10 = tmp0 + tmp3;
865 tmp13 = tmp0 - tmp3;
866 tmp11 = tmp0 + tmp2;
867 tmp12 = tmp0 - tmp2;
868 } else {
869 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
870 tmp2 = MULTIPLY(d2, FIX_0_541196100);
871 tmp3 = MULTIPLY(d2, FIX_1_306562965);
872
873 tmp10 = tmp3;
874 tmp13 = -tmp3;
875 tmp11 = tmp2;
876 tmp12 = -tmp2;
877 }
878 } else {
879 if (d0) {
880 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
881 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
882 } else {
883 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
884 tmp10 = tmp13 = tmp11 = tmp12 = 0;
885 }
886 }
887 }
888 } 628 }
889 629
890 /* Odd part per figure 8; the matrix is unitary and hence its 630 /* Odd part per figure 8; the matrix is unitary and hence its
891 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 631 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
892 */ 632 */