Mercurial > libpostproc.hg
comparison postprocess_template.c @ 118:bdd1788fb53b libpostproc
Change semantic of CONFIG_*, HAVE_* and ARCH_*.
They are now always defined to either 0 or 1.
author | aurel |
---|---|
date | Tue, 13 Jan 2009 23:44:16 +0000 |
parents | bf8f52662dc3 |
children | 4a1602d552aa |
comparison
equal
deleted
inserted
replaced
117:3a76063f4145 | 118:bdd1788fb53b |
---|---|
29 | 29 |
30 #undef PAVGB | 30 #undef PAVGB |
31 #undef PMINUB | 31 #undef PMINUB |
32 #undef PMAXUB | 32 #undef PMAXUB |
33 | 33 |
34 #ifdef HAVE_MMX2 | 34 #if HAVE_MMX2 |
35 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" | 35 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
36 #elif defined (HAVE_3DNOW) | 36 #elif HAVE_3DNOW |
37 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" | 37 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
38 #endif | 38 #endif |
39 #define PAVGB(a,b) REAL_PAVGB(a,b) | 39 #define PAVGB(a,b) REAL_PAVGB(a,b) |
40 | 40 |
41 #ifdef HAVE_MMX2 | 41 #if HAVE_MMX2 |
42 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" | 42 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" |
43 #elif defined (HAVE_MMX) | 43 #elif HAVE_MMX |
44 #define PMINUB(b,a,t) \ | 44 #define PMINUB(b,a,t) \ |
45 "movq " #a ", " #t " \n\t"\ | 45 "movq " #a ", " #t " \n\t"\ |
46 "psubusb " #b ", " #t " \n\t"\ | 46 "psubusb " #b ", " #t " \n\t"\ |
47 "psubb " #t ", " #a " \n\t" | 47 "psubb " #t ", " #a " \n\t" |
48 #endif | 48 #endif |
49 | 49 |
50 #ifdef HAVE_MMX2 | 50 #if HAVE_MMX2 |
51 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" | 51 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" |
52 #elif defined (HAVE_MMX) | 52 #elif HAVE_MMX |
53 #define PMAXUB(a,b) \ | 53 #define PMAXUB(a,b) \ |
54 "psubusb " #a ", " #b " \n\t"\ | 54 "psubusb " #a ", " #b " \n\t"\ |
55 "paddb " #a ", " #b " \n\t" | 55 "paddb " #a ", " #b " \n\t" |
56 #endif | 56 #endif |
57 | 57 |
58 //FIXME? |255-0| = 1 (should not be a problem ...) | 58 //FIXME? |255-0| = 1 (should not be a problem ...) |
59 #ifdef HAVE_MMX | 59 #if HAVE_MMX |
60 /** | 60 /** |
61 * Check if the middle 8x8 Block in the given 8x16 block is flat | 61 * Check if the middle 8x8 Block in the given 8x16 block is flat |
62 */ | 62 */ |
63 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ | 63 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
64 int numEq= 0, dcOk; | 64 int numEq= 0, dcOk; |
134 "pcmpgtb %%mm6, %%mm2 \n\t" | 134 "pcmpgtb %%mm6, %%mm2 \n\t" |
135 "paddb %%mm2, %%mm0 \n\t" | 135 "paddb %%mm2, %%mm0 \n\t" |
136 "psubusb %%mm3, %%mm4 \n\t" | 136 "psubusb %%mm3, %%mm4 \n\t" |
137 | 137 |
138 " \n\t" | 138 " \n\t" |
139 #ifdef HAVE_MMX2 | 139 #if HAVE_MMX2 |
140 "pxor %%mm7, %%mm7 \n\t" | 140 "pxor %%mm7, %%mm7 \n\t" |
141 "psadbw %%mm7, %%mm0 \n\t" | 141 "psadbw %%mm7, %%mm0 \n\t" |
142 #else | 142 #else |
143 "movq %%mm0, %%mm1 \n\t" | 143 "movq %%mm0, %%mm1 \n\t" |
144 "psrlw $8, %%mm0 \n\t" | 144 "psrlw $8, %%mm0 \n\t" |
174 | 174 |
175 /** | 175 /** |
176 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) | 176 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) |
177 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 | 177 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 |
178 */ | 178 */ |
179 #ifndef HAVE_ALTIVEC | 179 #if !HAVE_ALTIVEC |
180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) | 180 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
181 { | 181 { |
182 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 182 #if HAVE_MMX2 || HAVE_3DNOW |
183 src+= stride*3; | 183 src+= stride*3; |
184 __asm__ volatile( //"movv %0 %1 %2\n\t" | 184 __asm__ volatile( //"movv %0 %1 %2\n\t" |
185 "movq %2, %%mm0 \n\t" // QP,..., QP | 185 "movq %2, %%mm0 \n\t" // QP,..., QP |
186 "pxor %%mm4, %%mm4 \n\t" | 186 "pxor %%mm4, %%mm4 \n\t" |
187 | 187 |
304 | 304 |
305 : | 305 : |
306 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) | 306 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) |
307 : "%"REG_a, "%"REG_c | 307 : "%"REG_a, "%"REG_c |
308 ); | 308 ); |
309 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 309 #else //HAVE_MMX2 || HAVE_3DNOW |
310 const int l1= stride; | 310 const int l1= stride; |
311 const int l2= stride + l1; | 311 const int l2= stride + l1; |
312 const int l3= stride + l2; | 312 const int l3= stride + l2; |
313 const int l4= stride + l3; | 313 const int l4= stride + l3; |
314 const int l5= stride + l4; | 314 const int l5= stride + l4; |
343 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4; | 343 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4; |
344 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4; | 344 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4; |
345 | 345 |
346 src++; | 346 src++; |
347 } | 347 } |
348 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 348 #endif //HAVE_MMX2 || HAVE_3DNOW |
349 } | 349 } |
350 #endif //HAVE_ALTIVEC | 350 #endif //HAVE_ALTIVEC |
351 | 351 |
352 #if 0 | 352 #if 0 |
353 /** | 353 /** |
362 x/8 = 1 | 362 x/8 = 1 |
363 1 12 12 23 | 363 1 12 12 23 |
364 */ | 364 */ |
365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) | 365 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) |
366 { | 366 { |
367 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 367 #if HAVE_MMX2 || HAVE_3DNOW |
368 src+= stride*3; | 368 src+= stride*3; |
369 // FIXME rounding | 369 // FIXME rounding |
370 __asm__ volatile( | 370 __asm__ volatile( |
371 "pxor %%mm7, %%mm7 \n\t" // 0 | 371 "pxor %%mm7, %%mm7 \n\t" // 0 |
372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE | 372 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE |
424 | 424 |
425 : | 425 : |
426 : "r" (src), "r" ((x86_reg)stride) | 426 : "r" (src), "r" ((x86_reg)stride) |
427 : "%"REG_a, "%"REG_c | 427 : "%"REG_a, "%"REG_c |
428 ); | 428 ); |
429 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 429 #else //HAVE_MMX2 || HAVE_3DNOW |
430 const int l1= stride; | 430 const int l1= stride; |
431 const int l2= stride + l1; | 431 const int l2= stride + l1; |
432 const int l3= stride + l2; | 432 const int l3= stride + l2; |
433 const int l4= stride + l3; | 433 const int l4= stride + l3; |
434 const int l5= stride + l4; | 434 const int l5= stride + l4; |
447 src[x+l5] -=v>>1; | 447 src[x+l5] -=v>>1; |
448 src[x+l6] -=v>>3; | 448 src[x+l6] -=v>>3; |
449 } | 449 } |
450 } | 450 } |
451 | 451 |
452 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 452 #endif //HAVE_MMX2 || HAVE_3DNOW |
453 } | 453 } |
454 #endif //0 | 454 #endif //0 |
455 | 455 |
456 /** | 456 /** |
457 * Experimental Filter 1 | 457 * Experimental Filter 1 |
460 * can only smooth blocks at the expected locations (it cannot smooth them if they did move) | 460 * can only smooth blocks at the expected locations (it cannot smooth them if they did move) |
461 * MMX2 version does correct clipping C version does not | 461 * MMX2 version does correct clipping C version does not |
462 */ | 462 */ |
463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) | 463 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
464 { | 464 { |
465 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 465 #if HAVE_MMX2 || HAVE_3DNOW |
466 src+= stride*3; | 466 src+= stride*3; |
467 | 467 |
468 __asm__ volatile( | 468 __asm__ volatile( |
469 "pxor %%mm7, %%mm7 \n\t" // 0 | 469 "pxor %%mm7, %%mm7 \n\t" // 0 |
470 "lea (%0, %1), %%"REG_a" \n\t" | 470 "lea (%0, %1), %%"REG_a" \n\t" |
546 | 546 |
547 : | 547 : |
548 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) | 548 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) |
549 : "%"REG_a, "%"REG_c | 549 : "%"REG_a, "%"REG_c |
550 ); | 550 ); |
551 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 551 #else //HAVE_MMX2 || HAVE_3DNOW |
552 | 552 |
553 const int l1= stride; | 553 const int l1= stride; |
554 const int l2= stride + l1; | 554 const int l2= stride + l1; |
555 const int l3= stride + l2; | 555 const int l3= stride + l2; |
556 const int l4= stride + l3; | 556 const int l4= stride + l3; |
580 src[l6] -=v>>2; | 580 src[l6] -=v>>2; |
581 src[l7] -=v>>3; | 581 src[l7] -=v>>3; |
582 } | 582 } |
583 src++; | 583 src++; |
584 } | 584 } |
585 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 585 #endif //HAVE_MMX2 || HAVE_3DNOW |
586 } | 586 } |
587 | 587 |
588 #ifndef HAVE_ALTIVEC | 588 #if !HAVE_ALTIVEC |
589 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) | 589 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) |
590 { | 590 { |
591 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 591 #if HAVE_MMX2 || HAVE_3DNOW |
592 /* | 592 /* |
593 uint8_t tmp[16]; | 593 uint8_t tmp[16]; |
594 const int l1= stride; | 594 const int l1= stride; |
595 const int l2= stride + l1; | 595 const int l2= stride + l1; |
596 const int l3= stride + l2; | 596 const int l3= stride + l2; |
867 } | 867 } |
868 } | 868 } |
869 } | 869 } |
870 } | 870 } |
871 */ | 871 */ |
872 #elif defined (HAVE_MMX) | 872 #elif HAVE_MMX |
873 src+= stride*4; | 873 src+= stride*4; |
874 __asm__ volatile( | 874 __asm__ volatile( |
875 "pxor %%mm7, %%mm7 \n\t" | 875 "pxor %%mm7, %%mm7 \n\t" |
876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars | 876 "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars |
877 "and "ALIGN_MASK", %%"REG_c" \n\t" // align | 877 "and "ALIGN_MASK", %%"REG_c" \n\t" // align |
976 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 | 976 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 |
977 | 977 |
978 "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 | 978 "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 |
979 "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 | 979 "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 |
980 | 980 |
981 #ifdef HAVE_MMX2 | 981 #if HAVE_MMX2 |
982 "movq %%mm7, %%mm6 \n\t" // 0 | 982 "movq %%mm7, %%mm6 \n\t" // 0 |
983 "psubw %%mm0, %%mm6 \n\t" | 983 "psubw %%mm0, %%mm6 \n\t" |
984 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | 984 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| |
985 "movq %%mm7, %%mm6 \n\t" // 0 | 985 "movq %%mm7, %%mm6 \n\t" // 0 |
986 "psubw %%mm1, %%mm6 \n\t" | 986 "psubw %%mm1, %%mm6 \n\t" |
1008 "pcmpgtw %%mm3, %%mm6 \n\t" | 1008 "pcmpgtw %%mm3, %%mm6 \n\t" |
1009 "pxor %%mm6, %%mm3 \n\t" | 1009 "pxor %%mm6, %%mm3 \n\t" |
1010 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | 1010 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| |
1011 #endif | 1011 #endif |
1012 | 1012 |
1013 #ifdef HAVE_MMX2 | 1013 #if HAVE_MMX2 |
1014 "pminsw %%mm2, %%mm0 \n\t" | 1014 "pminsw %%mm2, %%mm0 \n\t" |
1015 "pminsw %%mm3, %%mm1 \n\t" | 1015 "pminsw %%mm3, %%mm1 \n\t" |
1016 #else | 1016 #else |
1017 "movq %%mm0, %%mm6 \n\t" | 1017 "movq %%mm0, %%mm6 \n\t" |
1018 "psubusw %%mm2, %%mm6 \n\t" | 1018 "psubusw %%mm2, %%mm6 \n\t" |
1072 "pxor %%mm6, %%mm2 \n\t" | 1072 "pxor %%mm6, %%mm2 \n\t" |
1073 "pxor %%mm7, %%mm3 \n\t" | 1073 "pxor %%mm7, %%mm3 \n\t" |
1074 "pand %%mm2, %%mm4 \n\t" | 1074 "pand %%mm2, %%mm4 \n\t" |
1075 "pand %%mm3, %%mm5 \n\t" | 1075 "pand %%mm3, %%mm5 \n\t" |
1076 | 1076 |
1077 #ifdef HAVE_MMX2 | 1077 #if HAVE_MMX2 |
1078 "pminsw %%mm0, %%mm4 \n\t" | 1078 "pminsw %%mm0, %%mm4 \n\t" |
1079 "pminsw %%mm1, %%mm5 \n\t" | 1079 "pminsw %%mm1, %%mm5 \n\t" |
1080 #else | 1080 #else |
1081 "movq %%mm4, %%mm2 \n\t" | 1081 "movq %%mm4, %%mm2 \n\t" |
1082 "psubusw %%mm0, %%mm2 \n\t" | 1082 "psubusw %%mm0, %%mm2 \n\t" |
1099 | 1099 |
1100 : "+r" (src) | 1100 : "+r" (src) |
1101 : "r" ((x86_reg)stride), "m" (c->pQPb) | 1101 : "r" ((x86_reg)stride), "m" (c->pQPb) |
1102 : "%"REG_a, "%"REG_c | 1102 : "%"REG_a, "%"REG_c |
1103 ); | 1103 ); |
1104 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1104 #else //HAVE_MMX2 || HAVE_3DNOW |
1105 const int l1= stride; | 1105 const int l1= stride; |
1106 const int l2= stride + l1; | 1106 const int l2= stride + l1; |
1107 const int l3= stride + l2; | 1107 const int l3= stride + l2; |
1108 const int l4= stride + l3; | 1108 const int l4= stride + l3; |
1109 const int l5= stride + l4; | 1109 const int l5= stride + l4; |
1137 src[l4]-= d; | 1137 src[l4]-= d; |
1138 src[l5]+= d; | 1138 src[l5]+= d; |
1139 } | 1139 } |
1140 src++; | 1140 src++; |
1141 } | 1141 } |
1142 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1142 #endif //HAVE_MMX2 || HAVE_3DNOW |
1143 } | 1143 } |
1144 #endif //HAVE_ALTIVEC | 1144 #endif //HAVE_ALTIVEC |
1145 | 1145 |
1146 #ifndef HAVE_ALTIVEC | 1146 #if !HAVE_ALTIVEC |
1147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) | 1147 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) |
1148 { | 1148 { |
1149 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1149 #if HAVE_MMX2 || HAVE_3DNOW |
1150 __asm__ volatile( | 1150 __asm__ volatile( |
1151 "pxor %%mm6, %%mm6 \n\t" | 1151 "pxor %%mm6, %%mm6 \n\t" |
1152 "pcmpeqb %%mm7, %%mm7 \n\t" | 1152 "pcmpeqb %%mm7, %%mm7 \n\t" |
1153 "movq %2, %%mm0 \n\t" | 1153 "movq %2, %%mm0 \n\t" |
1154 "punpcklbw %%mm6, %%mm0 \n\t" | 1154 "punpcklbw %%mm6, %%mm0 \n\t" |
1162 | 1162 |
1163 // 0 1 2 3 4 5 6 7 8 9 | 1163 // 0 1 2 3 4 5 6 7 8 9 |
1164 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 1164 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
1165 | 1165 |
1166 #undef FIND_MIN_MAX | 1166 #undef FIND_MIN_MAX |
1167 #ifdef HAVE_MMX2 | 1167 #if HAVE_MMX2 |
1168 #define REAL_FIND_MIN_MAX(addr)\ | 1168 #define REAL_FIND_MIN_MAX(addr)\ |
1169 "movq " #addr ", %%mm0 \n\t"\ | 1169 "movq " #addr ", %%mm0 \n\t"\ |
1170 "pminub %%mm0, %%mm7 \n\t"\ | 1170 "pminub %%mm0, %%mm7 \n\t"\ |
1171 "pmaxub %%mm0, %%mm6 \n\t" | 1171 "pmaxub %%mm0, %%mm6 \n\t" |
1172 #else | 1172 #else |
1189 FIND_MIN_MAX((%%REGd, %1, 2)) | 1189 FIND_MIN_MAX((%%REGd, %1, 2)) |
1190 FIND_MIN_MAX((%0, %1, 8)) | 1190 FIND_MIN_MAX((%0, %1, 8)) |
1191 | 1191 |
1192 "movq %%mm7, %%mm4 \n\t" | 1192 "movq %%mm7, %%mm4 \n\t" |
1193 "psrlq $8, %%mm7 \n\t" | 1193 "psrlq $8, %%mm7 \n\t" |
1194 #ifdef HAVE_MMX2 | 1194 #if HAVE_MMX2 |
1195 "pminub %%mm4, %%mm7 \n\t" // min of pixels | 1195 "pminub %%mm4, %%mm7 \n\t" // min of pixels |
1196 "pshufw $0xF9, %%mm7, %%mm4 \n\t" | 1196 "pshufw $0xF9, %%mm7, %%mm4 \n\t" |
1197 "pminub %%mm4, %%mm7 \n\t" // min of pixels | 1197 "pminub %%mm4, %%mm7 \n\t" // min of pixels |
1198 "pshufw $0xFE, %%mm7, %%mm4 \n\t" | 1198 "pshufw $0xFE, %%mm7, %%mm4 \n\t" |
1199 "pminub %%mm4, %%mm7 \n\t" | 1199 "pminub %%mm4, %%mm7 \n\t" |
1214 #endif | 1214 #endif |
1215 | 1215 |
1216 | 1216 |
1217 "movq %%mm6, %%mm4 \n\t" | 1217 "movq %%mm6, %%mm4 \n\t" |
1218 "psrlq $8, %%mm6 \n\t" | 1218 "psrlq $8, %%mm6 \n\t" |
1219 #ifdef HAVE_MMX2 | 1219 #if HAVE_MMX2 |
1220 "pmaxub %%mm4, %%mm6 \n\t" // max of pixels | 1220 "pmaxub %%mm4, %%mm6 \n\t" // max of pixels |
1221 "pshufw $0xF9, %%mm6, %%mm4 \n\t" | 1221 "pshufw $0xF9, %%mm6, %%mm4 \n\t" |
1222 "pmaxub %%mm4, %%mm6 \n\t" | 1222 "pmaxub %%mm4, %%mm6 \n\t" |
1223 "pshufw $0xFE, %%mm6, %%mm4 \n\t" | 1223 "pshufw $0xFE, %%mm6, %%mm4 \n\t" |
1224 "pmaxub %%mm4, %%mm6 \n\t" | 1224 "pmaxub %%mm4, %%mm6 \n\t" |
1368 | 1368 |
1369 "1: \n\t" | 1369 "1: \n\t" |
1370 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2) | 1370 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2) |
1371 : "%"REG_a, "%"REG_d, "%"REG_c | 1371 : "%"REG_a, "%"REG_d, "%"REG_c |
1372 ); | 1372 ); |
1373 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1373 #else //HAVE_MMX2 || HAVE_3DNOW |
1374 int y; | 1374 int y; |
1375 int min=255; | 1375 int min=255; |
1376 int max=0; | 1376 int max=0; |
1377 int avg; | 1377 int avg; |
1378 uint8_t *p; | 1378 uint8_t *p; |
1485 } | 1485 } |
1486 } | 1486 } |
1487 // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; | 1487 // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; |
1488 } | 1488 } |
1489 #endif | 1489 #endif |
1490 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1490 #endif //HAVE_MMX2 || HAVE_3DNOW |
1491 } | 1491 } |
1492 #endif //HAVE_ALTIVEC | 1492 #endif //HAVE_ALTIVEC |
1493 | 1493 |
1494 /** | 1494 /** |
1495 * Deinterlaces the given block by linearly interpolating every second line. | 1495 * Deinterlaces the given block by linearly interpolating every second line. |
1497 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. | 1497 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. |
1498 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1498 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
1499 */ | 1499 */ |
1500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) | 1500 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) |
1501 { | 1501 { |
1502 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1502 #if HAVE_MMX2 || HAVE_3DNOW |
1503 src+= 4*stride; | 1503 src+= 4*stride; |
1504 __asm__ volatile( | 1504 __asm__ volatile( |
1505 "lea (%0, %1), %%"REG_a" \n\t" | 1505 "lea (%0, %1), %%"REG_a" \n\t" |
1506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" | 1506 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" |
1507 // 0 1 2 3 4 5 6 7 8 9 | 1507 // 0 1 2 3 4 5 6 7 8 9 |
1550 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1550 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
1551 * this filter will read lines 3-15 and write 7-13 | 1551 * this filter will read lines 3-15 and write 7-13 |
1552 */ | 1552 */ |
1553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) | 1553 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) |
1554 { | 1554 { |
1555 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1555 #if HAVE_MMX2 || HAVE_3DNOW |
1556 src+= stride*3; | 1556 src+= stride*3; |
1557 __asm__ volatile( | 1557 __asm__ volatile( |
1558 "lea (%0, %1), %%"REG_a" \n\t" | 1558 "lea (%0, %1), %%"REG_a" \n\t" |
1559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1559 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
1560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" | 1560 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" |
1592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) | 1592 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) |
1593 | 1593 |
1594 : : "r" (src), "r" ((x86_reg)stride) | 1594 : : "r" (src), "r" ((x86_reg)stride) |
1595 : "%"REG_a, "%"REG_d, "%"REG_c | 1595 : "%"REG_a, "%"REG_d, "%"REG_c |
1596 ); | 1596 ); |
1597 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1597 #else //HAVE_MMX2 || HAVE_3DNOW |
1598 int x; | 1598 int x; |
1599 src+= stride*3; | 1599 src+= stride*3; |
1600 for(x=0; x<8; x++){ | 1600 for(x=0; x<8; x++){ |
1601 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); | 1601 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); |
1602 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); | 1602 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); |
1603 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); | 1603 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); |
1604 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); | 1604 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); |
1605 src++; | 1605 src++; |
1606 } | 1606 } |
1607 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1607 #endif //HAVE_MMX2 || HAVE_3DNOW |
1608 } | 1608 } |
1609 | 1609 |
1610 /** | 1610 /** |
1611 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. | 1611 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter. |
1612 * will be called for every 8x8 block and can read & write from line 4-15 | 1612 * will be called for every 8x8 block and can read & write from line 4-15 |
1614 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1614 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
1615 * this filter will read lines 4-13 and write 5-11 | 1615 * this filter will read lines 4-13 and write 5-11 |
1616 */ | 1616 */ |
1617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) | 1617 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) |
1618 { | 1618 { |
1619 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1619 #if HAVE_MMX2 || HAVE_3DNOW |
1620 src+= stride*4; | 1620 src+= stride*4; |
1621 __asm__ volatile( | 1621 __asm__ volatile( |
1622 "lea (%0, %1), %%"REG_a" \n\t" | 1622 "lea (%0, %1), %%"REG_a" \n\t" |
1623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1623 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
1624 "pxor %%mm7, %%mm7 \n\t" | 1624 "pxor %%mm7, %%mm7 \n\t" |
1663 | 1663 |
1664 "movq %%mm0, (%2) \n\t" | 1664 "movq %%mm0, (%2) \n\t" |
1665 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) | 1665 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) |
1666 : "%"REG_a, "%"REG_d | 1666 : "%"REG_a, "%"REG_d |
1667 ); | 1667 ); |
1668 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1668 #else //HAVE_MMX2 || HAVE_3DNOW |
1669 int x; | 1669 int x; |
1670 src+= stride*4; | 1670 src+= stride*4; |
1671 for(x=0; x<8; x++){ | 1671 for(x=0; x<8; x++){ |
1672 int t1= tmp[x]; | 1672 int t1= tmp[x]; |
1673 int t2= src[stride*1]; | 1673 int t2= src[stride*1]; |
1681 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); | 1681 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3); |
1682 tmp[x]= t1; | 1682 tmp[x]= t1; |
1683 | 1683 |
1684 src++; | 1684 src++; |
1685 } | 1685 } |
1686 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1686 #endif //HAVE_MMX2 || HAVE_3DNOW |
1687 } | 1687 } |
1688 | 1688 |
1689 /** | 1689 /** |
1690 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. | 1690 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter. |
1691 * will be called for every 8x8 block and can read & write from line 4-15 | 1691 * will be called for every 8x8 block and can read & write from line 4-15 |
1693 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1693 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
1694 * this filter will read lines 4-13 and write 4-11 | 1694 * this filter will read lines 4-13 and write 4-11 |
1695 */ | 1695 */ |
1696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) | 1696 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) |
1697 { | 1697 { |
1698 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1698 #if HAVE_MMX2 || HAVE_3DNOW |
1699 src+= stride*4; | 1699 src+= stride*4; |
1700 __asm__ volatile( | 1700 __asm__ volatile( |
1701 "lea (%0, %1), %%"REG_a" \n\t" | 1701 "lea (%0, %1), %%"REG_a" \n\t" |
1702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1702 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
1703 "pxor %%mm7, %%mm7 \n\t" | 1703 "pxor %%mm7, %%mm7 \n\t" |
1753 "movq %%mm0, (%2) \n\t" | 1753 "movq %%mm0, (%2) \n\t" |
1754 "movq %%mm1, (%3) \n\t" | 1754 "movq %%mm1, (%3) \n\t" |
1755 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) | 1755 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) |
1756 : "%"REG_a, "%"REG_d | 1756 : "%"REG_a, "%"REG_d |
1757 ); | 1757 ); |
1758 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1758 #else //HAVE_MMX2 || HAVE_3DNOW |
1759 int x; | 1759 int x; |
1760 src+= stride*4; | 1760 src+= stride*4; |
1761 for(x=0; x<8; x++){ | 1761 for(x=0; x<8; x++){ |
1762 int t1= tmp[x]; | 1762 int t1= tmp[x]; |
1763 int t2= tmp2[x]; | 1763 int t2= tmp2[x]; |
1782 tmp[x]= t3; | 1782 tmp[x]= t3; |
1783 tmp2[x]= t1; | 1783 tmp2[x]= t1; |
1784 | 1784 |
1785 src++; | 1785 src++; |
1786 } | 1786 } |
1787 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1787 #endif //HAVE_MMX2 || HAVE_3DNOW |
1788 } | 1788 } |
1789 | 1789 |
1790 /** | 1790 /** |
1791 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. | 1791 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter. |
1792 * will be called for every 8x8 block and can read & write from line 4-15 | 1792 * will be called for every 8x8 block and can read & write from line 4-15 |
1794 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1794 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
1795 * this filter will read lines 4-13 and write 4-11 | 1795 * this filter will read lines 4-13 and write 4-11 |
1796 */ | 1796 */ |
1797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) | 1797 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) |
1798 { | 1798 { |
1799 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1799 #if HAVE_MMX2 || HAVE_3DNOW |
1800 src+= 4*stride; | 1800 src+= 4*stride; |
1801 __asm__ volatile( | 1801 __asm__ volatile( |
1802 "lea (%0, %1), %%"REG_a" \n\t" | 1802 "lea (%0, %1), %%"REG_a" \n\t" |
1803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1803 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
1804 // 0 1 2 3 4 5 6 7 8 9 | 1804 // 0 1 2 3 4 5 6 7 8 9 |
1841 "movq %%mm1, (%2) \n\t" | 1841 "movq %%mm1, (%2) \n\t" |
1842 | 1842 |
1843 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) | 1843 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) |
1844 : "%"REG_a, "%"REG_d | 1844 : "%"REG_a, "%"REG_d |
1845 ); | 1845 ); |
1846 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1846 #else //HAVE_MMX2 || HAVE_3DNOW |
1847 int a, b, c, x; | 1847 int a, b, c, x; |
1848 src+= 4*stride; | 1848 src+= 4*stride; |
1849 | 1849 |
1850 for(x=0; x<2; x++){ | 1850 for(x=0; x<2; x++){ |
1851 a= *(uint32_t*)&tmp[stride*0]; | 1851 a= *(uint32_t*)&tmp[stride*0]; |
1884 | 1884 |
1885 *(uint32_t*)&tmp[stride*0]= c; | 1885 *(uint32_t*)&tmp[stride*0]= c; |
1886 src += 4; | 1886 src += 4; |
1887 tmp += 4; | 1887 tmp += 4; |
1888 } | 1888 } |
1889 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1889 #endif //HAVE_MMX2 || HAVE_3DNOW |
1890 } | 1890 } |
1891 | 1891 |
1892 /** | 1892 /** |
1893 * Deinterlaces the given block by applying a median filter to every second line. | 1893 * Deinterlaces the given block by applying a median filter to every second line. |
1894 * will be called for every 8x8 block and can read & write from line 4-15, | 1894 * will be called for every 8x8 block and can read & write from line 4-15, |
1895 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. | 1895 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. |
1896 * lines 4-12 will be read into the deblocking filter and should be deinterlaced | 1896 * lines 4-12 will be read into the deblocking filter and should be deinterlaced |
1897 */ | 1897 */ |
1898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) | 1898 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) |
1899 { | 1899 { |
1900 #ifdef HAVE_MMX | 1900 #ifd HAVE_MMX |
1901 src+= 4*stride; | 1901 src+= 4*stride; |
1902 #ifdef HAVE_MMX2 | 1902 #if HAVE_MMX2 |
1903 __asm__ volatile( | 1903 __asm__ volatile( |
1904 "lea (%0, %1), %%"REG_a" \n\t" | 1904 "lea (%0, %1), %%"REG_a" \n\t" |
1905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" | 1905 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" |
1906 // 0 1 2 3 4 5 6 7 8 9 | 1906 // 0 1 2 3 4 5 6 7 8 9 |
1907 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 | 1907 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 |
2010 src++; | 2010 src++; |
2011 } | 2011 } |
2012 #endif //HAVE_MMX | 2012 #endif //HAVE_MMX |
2013 } | 2013 } |
2014 | 2014 |
2015 #ifdef HAVE_MMX | 2015 #if HAVE_MMX |
2016 /** | 2016 /** |
2017 * transposes and shift the given 8x8 Block into dst1 and dst2 | 2017 * transposes and shift the given 8x8 Block into dst1 and dst2 |
2018 */ | 2018 */ |
2019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) | 2019 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) |
2020 { | 2020 { |
2178 ); | 2178 ); |
2179 } | 2179 } |
2180 #endif //HAVE_MMX | 2180 #endif //HAVE_MMX |
2181 //static long test=0; | 2181 //static long test=0; |
2182 | 2182 |
2183 #ifndef HAVE_ALTIVEC | 2183 #if !HAVE_ALTIVEC |
2184 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, | 2184 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, |
2185 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) | 2185 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) |
2186 { | 2186 { |
2187 // to save a register (FIXME do this outside of the loops) | 2187 // to save a register (FIXME do this outside of the loops) |
2188 tempBlurredPast[127]= maxNoise[0]; | 2188 tempBlurredPast[127]= maxNoise[0]; |
2189 tempBlurredPast[128]= maxNoise[1]; | 2189 tempBlurredPast[128]= maxNoise[1]; |
2190 tempBlurredPast[129]= maxNoise[2]; | 2190 tempBlurredPast[129]= maxNoise[2]; |
2191 | 2191 |
2192 #define FAST_L2_DIFF | 2192 #define FAST_L2_DIFF |
2193 //#define L1_DIFF //u should change the thresholds too if u try that one | 2193 //#define L1_DIFF //u should change the thresholds too if u try that one |
2194 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2194 #if HAVE_MMX2 || HAVE_3DNOW |
2195 __asm__ volatile( | 2195 __asm__ volatile( |
2196 "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride | 2196 "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride |
2197 "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride | 2197 "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride |
2198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride | 2198 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride |
2199 // 0 1 2 3 4 5 6 7 8 9 | 2199 // 0 1 2 3 4 5 6 7 8 9 |
2477 "4: \n\t" | 2477 "4: \n\t" |
2478 | 2478 |
2479 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) | 2479 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) |
2480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" | 2480 : "%"REG_a, "%"REG_d, "%"REG_c, "memory" |
2481 ); | 2481 ); |
2482 #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2482 #else //HAVE_MMX2 || HAVE_3DNOW |
2483 { | 2483 { |
2484 int y; | 2484 int y; |
2485 int d=0; | 2485 int d=0; |
2486 // int sysd=0; | 2486 // int sysd=0; |
2487 int i; | 2487 int i; |
2560 } | 2560 } |
2561 } | 2561 } |
2562 } | 2562 } |
2563 } | 2563 } |
2564 } | 2564 } |
2565 #endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 2565 #endif //HAVE_MMX2 || HAVE_3DNOW |
2566 } | 2566 } |
2567 #endif //HAVE_ALTIVEC | 2567 #endif //HAVE_ALTIVEC |
2568 | 2568 |
2569 #ifdef HAVE_MMX | 2569 #if HAVE_MMX |
2570 /** | 2570 /** |
2571 * accurate deblock filter | 2571 * accurate deblock filter |
2572 */ | 2572 */ |
2573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){ | 2573 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){ |
2574 int64_t dc_mask, eq_mask, both_masks; | 2574 int64_t dc_mask, eq_mask, both_masks; |
2968 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 | 2968 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 |
2969 | 2969 |
2970 "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 | 2970 "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 |
2971 "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 | 2971 "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 |
2972 | 2972 |
2973 #ifdef HAVE_MMX2 | 2973 #if HAVE_MMX2 |
2974 "movq %%mm7, %%mm6 \n\t" // 0 | 2974 "movq %%mm7, %%mm6 \n\t" // 0 |
2975 "psubw %%mm0, %%mm6 \n\t" | 2975 "psubw %%mm0, %%mm6 \n\t" |
2976 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | 2976 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| |
2977 "movq %%mm7, %%mm6 \n\t" // 0 | 2977 "movq %%mm7, %%mm6 \n\t" // 0 |
2978 "psubw %%mm1, %%mm6 \n\t" | 2978 "psubw %%mm1, %%mm6 \n\t" |
3000 "pcmpgtw %%mm3, %%mm6 \n\t" | 3000 "pcmpgtw %%mm3, %%mm6 \n\t" |
3001 "pxor %%mm6, %%mm3 \n\t" | 3001 "pxor %%mm6, %%mm3 \n\t" |
3002 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | 3002 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| |
3003 #endif | 3003 #endif |
3004 | 3004 |
3005 #ifdef HAVE_MMX2 | 3005 #if HAVE_MMX2 |
3006 "pminsw %%mm2, %%mm0 \n\t" | 3006 "pminsw %%mm2, %%mm0 \n\t" |
3007 "pminsw %%mm3, %%mm1 \n\t" | 3007 "pminsw %%mm3, %%mm1 \n\t" |
3008 #else | 3008 #else |
3009 "movq %%mm0, %%mm6 \n\t" | 3009 "movq %%mm0, %%mm6 \n\t" |
3010 "psubusw %%mm2, %%mm6 \n\t" | 3010 "psubusw %%mm2, %%mm6 \n\t" |
3064 "pxor %%mm6, %%mm2 \n\t" | 3064 "pxor %%mm6, %%mm2 \n\t" |
3065 "pxor %%mm7, %%mm3 \n\t" | 3065 "pxor %%mm7, %%mm3 \n\t" |
3066 "pand %%mm2, %%mm4 \n\t" | 3066 "pand %%mm2, %%mm4 \n\t" |
3067 "pand %%mm3, %%mm5 \n\t" | 3067 "pand %%mm3, %%mm5 \n\t" |
3068 | 3068 |
3069 #ifdef HAVE_MMX2 | 3069 #if HAVE_MMX2 |
3070 "pminsw %%mm0, %%mm4 \n\t" | 3070 "pminsw %%mm0, %%mm4 \n\t" |
3071 "pminsw %%mm1, %%mm5 \n\t" | 3071 "pminsw %%mm1, %%mm5 \n\t" |
3072 #else | 3072 #else |
3073 "movq %%mm4, %%mm2 \n\t" | 3073 "movq %%mm4, %%mm2 \n\t" |
3074 "psubusw %%mm0, %%mm2 \n\t" | 3074 "psubusw %%mm0, %%mm2 \n\t" |
3114 #undef SCALED_CPY | 3114 #undef SCALED_CPY |
3115 | 3115 |
3116 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, | 3116 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, |
3117 int levelFix, int64_t *packedOffsetAndScale) | 3117 int levelFix, int64_t *packedOffsetAndScale) |
3118 { | 3118 { |
3119 #ifndef HAVE_MMX | 3119 #if !HAVE_MMX |
3120 int i; | 3120 int i; |
3121 #endif | 3121 #endif |
3122 if(levelFix){ | 3122 if(levelFix){ |
3123 #ifdef HAVE_MMX | 3123 #if HAVE_MMX |
3124 __asm__ volatile( | 3124 __asm__ volatile( |
3125 "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset | 3125 "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset |
3126 "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale | 3126 "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale |
3127 "lea (%2,%4), %%"REG_a" \n\t" | 3127 "lea (%2,%4), %%"REG_a" \n\t" |
3128 "lea (%3,%5), %%"REG_d" \n\t" | 3128 "lea (%3,%5), %%"REG_d" \n\t" |
3129 "pxor %%mm4, %%mm4 \n\t" | 3129 "pxor %%mm4, %%mm4 \n\t" |
3130 #ifdef HAVE_MMX2 | 3130 #if HAVE_MMX2 |
3131 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ | 3131 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ |
3132 "movq " #src1 ", %%mm0 \n\t"\ | 3132 "movq " #src1 ", %%mm0 \n\t"\ |
3133 "movq " #src1 ", %%mm5 \n\t"\ | 3133 "movq " #src1 ", %%mm5 \n\t"\ |
3134 "movq " #src2 ", %%mm1 \n\t"\ | 3134 "movq " #src2 ", %%mm1 \n\t"\ |
3135 "movq " #src2 ", %%mm6 \n\t"\ | 3135 "movq " #src2 ", %%mm6 \n\t"\ |
3201 for(i=0; i<8; i++) | 3201 for(i=0; i<8; i++) |
3202 memcpy( &(dst[dstStride*i]), | 3202 memcpy( &(dst[dstStride*i]), |
3203 &(src[srcStride*i]), BLOCK_SIZE); | 3203 &(src[srcStride*i]), BLOCK_SIZE); |
3204 #endif //HAVE_MMX | 3204 #endif //HAVE_MMX |
3205 }else{ | 3205 }else{ |
3206 #ifdef HAVE_MMX | 3206 #if HAVE_MMX |
3207 __asm__ volatile( | 3207 __asm__ volatile( |
3208 "lea (%0,%2), %%"REG_a" \n\t" | 3208 "lea (%0,%2), %%"REG_a" \n\t" |
3209 "lea (%1,%3), %%"REG_d" \n\t" | 3209 "lea (%1,%3), %%"REG_d" \n\t" |
3210 | 3210 |
3211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ | 3211 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ |
3241 /** | 3241 /** |
3242 * Duplicates the given 8 src pixels ? times upward | 3242 * Duplicates the given 8 src pixels ? times upward |
3243 */ | 3243 */ |
3244 static inline void RENAME(duplicate)(uint8_t src[], int stride) | 3244 static inline void RENAME(duplicate)(uint8_t src[], int stride) |
3245 { | 3245 { |
3246 #ifdef HAVE_MMX | 3246 #if HAVE_MMX |
3247 __asm__ volatile( | 3247 __asm__ volatile( |
3248 "movq (%0), %%mm0 \n\t" | 3248 "movq (%0), %%mm0 \n\t" |
3249 "add %1, %0 \n\t" | 3249 "add %1, %0 \n\t" |
3250 "movq %%mm0, (%0) \n\t" | 3250 "movq %%mm0, (%0) \n\t" |
3251 "movq %%mm0, (%0, %1) \n\t" | 3251 "movq %%mm0, (%0, %1) \n\t" |
3278 #endif | 3278 #endif |
3279 int black=0, white=255; // blackest black and whitest white in the picture | 3279 int black=0, white=255; // blackest black and whitest white in the picture |
3280 int QPCorrecture= 256*256; | 3280 int QPCorrecture= 256*256; |
3281 | 3281 |
3282 int copyAhead; | 3282 int copyAhead; |
3283 #ifdef HAVE_MMX | 3283 #if HAVE_MMX |
3284 int i; | 3284 int i; |
3285 #endif | 3285 #endif |
3286 | 3286 |
3287 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4; | 3287 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4; |
3288 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4; | 3288 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4; |
3291 uint64_t * const yHistogram= c.yHistogram; | 3291 uint64_t * const yHistogram= c.yHistogram; |
3292 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride; | 3292 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride; |
3293 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; | 3293 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; |
3294 //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; | 3294 //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; |
3295 | 3295 |
3296 #ifdef HAVE_MMX | 3296 #if HAVE_MMX |
3297 for(i=0; i<57; i++){ | 3297 for(i=0; i<57; i++){ |
3298 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; | 3298 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; |
3299 int threshold= offset*2 + 1; | 3299 int threshold= offset*2 + 1; |
3300 c.mmxDcOffset[i]= 0x7F - offset; | 3300 c.mmxDcOffset[i]= 0x7F - offset; |
3301 c.mmxDcThreshold[i]= 0x7F - threshold; | 3301 c.mmxDcThreshold[i]= 0x7F - threshold; |
3349 clipped-= yHistogram[white]; | 3349 clipped-= yHistogram[white]; |
3350 } | 3350 } |
3351 | 3351 |
3352 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); | 3352 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); |
3353 | 3353 |
3354 #ifdef HAVE_MMX2 | 3354 #if HAVE_MMX2 |
3355 c.packedYScale= (uint16_t)(scale*256.0 + 0.5); | 3355 c.packedYScale= (uint16_t)(scale*256.0 + 0.5); |
3356 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; | 3356 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; |
3357 #else | 3357 #else |
3358 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5); | 3358 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5); |
3359 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF; | 3359 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF; |
3382 // From this point on it is guaranteed that we can read and write 16 lines downward | 3382 // From this point on it is guaranteed that we can read and write 16 lines downward |
3383 // finish 1 block before the next otherwise we might have a problem | 3383 // finish 1 block before the next otherwise we might have a problem |
3384 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 3384 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing |
3385 for(x=0; x<width; x+=BLOCK_SIZE){ | 3385 for(x=0; x<width; x+=BLOCK_SIZE){ |
3386 | 3386 |
3387 #ifdef HAVE_MMX2 | 3387 #if HAVE_MMX2 |
3388 /* | 3388 /* |
3389 prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); | 3389 prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); |
3390 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); | 3390 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
3391 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); | 3391 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); |
3392 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); | 3392 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); |
3409 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), | 3409 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), |
3410 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) | 3410 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) |
3411 : "%"REG_a, "%"REG_d | 3411 : "%"REG_a, "%"REG_d |
3412 ); | 3412 ); |
3413 | 3413 |
3414 #elif defined(HAVE_3DNOW) | 3414 #elif HAVE_3DNOW |
3415 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... | 3415 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... |
3416 /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); | 3416 /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); |
3417 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); | 3417 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); |
3418 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32); | 3418 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32); |
3419 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); | 3419 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); |
3455 | 3455 |
3456 for(y=0; y<height; y+=BLOCK_SIZE){ | 3456 for(y=0; y<height; y+=BLOCK_SIZE){ |
3457 //1% speedup if these are here instead of the inner loop | 3457 //1% speedup if these are here instead of the inner loop |
3458 const uint8_t *srcBlock= &(src[y*srcStride]); | 3458 const uint8_t *srcBlock= &(src[y*srcStride]); |
3459 uint8_t *dstBlock= &(dst[y*dstStride]); | 3459 uint8_t *dstBlock= &(dst[y*dstStride]); |
3460 #ifdef HAVE_MMX | 3460 #if HAVE_MMX |
3461 uint8_t *tempBlock1= c.tempBlocks; | 3461 uint8_t *tempBlock1= c.tempBlocks; |
3462 uint8_t *tempBlock2= c.tempBlocks + 8; | 3462 uint8_t *tempBlock2= c.tempBlocks + 8; |
3463 #endif | 3463 #endif |
3464 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride]; | 3464 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride]; |
3465 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)]; | 3465 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)]; |
3491 // From this point on it is guaranteed that we can read and write 16 lines downward | 3491 // From this point on it is guaranteed that we can read and write 16 lines downward |
3492 // finish 1 block before the next otherwise we might have a problem | 3492 // finish 1 block before the next otherwise we might have a problem |
3493 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing | 3493 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing |
3494 for(x=0; x<width; x+=BLOCK_SIZE){ | 3494 for(x=0; x<width; x+=BLOCK_SIZE){ |
3495 const int stride= dstStride; | 3495 const int stride= dstStride; |
3496 #ifdef HAVE_MMX | 3496 #if HAVE_MMX |
3497 uint8_t *tmpXchg; | 3497 uint8_t *tmpXchg; |
3498 #endif | 3498 #endif |
3499 if(isColor){ | 3499 if(isColor){ |
3500 QP= QPptr[x>>qpHShift]; | 3500 QP= QPptr[x>>qpHShift]; |
3501 c.nonBQP= nonBQPptr[x>>qpHShift]; | 3501 c.nonBQP= nonBQPptr[x>>qpHShift]; |
3505 c.nonBQP= nonBQPptr[x>>4]; | 3505 c.nonBQP= nonBQPptr[x>>4]; |
3506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; | 3506 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; |
3507 yHistogram[ srcBlock[srcStride*12 + 4] ]++; | 3507 yHistogram[ srcBlock[srcStride*12 + 4] ]++; |
3508 } | 3508 } |
3509 c.QP= QP; | 3509 c.QP= QP; |
3510 #ifdef HAVE_MMX | 3510 #if HAVE_MMX |
3511 __asm__ volatile( | 3511 __asm__ volatile( |
3512 "movd %1, %%mm7 \n\t" | 3512 "movd %1, %%mm7 \n\t" |
3513 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP | 3513 "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP |
3514 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP | 3514 "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP |
3515 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP | 3515 "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP |
3518 : "r" (QP) | 3518 : "r" (QP) |
3519 ); | 3519 ); |
3520 #endif | 3520 #endif |
3521 | 3521 |
3522 | 3522 |
3523 #ifdef HAVE_MMX2 | 3523 #if HAVE_MMX2 |
3524 /* | 3524 /* |
3525 prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); | 3525 prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); |
3526 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); | 3526 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); |
3527 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); | 3527 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32); |
3528 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); | 3528 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32); |
3545 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), | 3545 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride), |
3546 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) | 3546 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead) |
3547 : "%"REG_a, "%"REG_d | 3547 : "%"REG_a, "%"REG_d |
3548 ); | 3548 ); |
3549 | 3549 |
3550 #elif defined(HAVE_3DNOW) | 3550 #elif HAVE_3DNOW |
3551 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... | 3551 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... |
3552 /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); | 3552 /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); |
3553 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); | 3553 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); |
3554 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32); | 3554 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32); |
3555 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); | 3555 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32); |
3589 }else if(mode & V_A_DEBLOCK){ | 3589 }else if(mode & V_A_DEBLOCK){ |
3590 RENAME(do_a_deblock)(dstBlock, stride, 1, &c); | 3590 RENAME(do_a_deblock)(dstBlock, stride, 1, &c); |
3591 } | 3591 } |
3592 } | 3592 } |
3593 | 3593 |
3594 #ifdef HAVE_MMX | 3594 #if HAVE_MMX |
3595 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); | 3595 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); |
3596 #endif | 3596 #endif |
3597 /* check if we have a previous block to deblock it with dstBlock */ | 3597 /* check if we have a previous block to deblock it with dstBlock */ |
3598 if(x - 8 >= 0){ | 3598 if(x - 8 >= 0){ |
3599 #ifdef HAVE_MMX | 3599 #if HAVE_MMX |
3600 if(mode & H_X1_FILTER) | 3600 if(mode & H_X1_FILTER) |
3601 RENAME(vertX1Filter)(tempBlock1, 16, &c); | 3601 RENAME(vertX1Filter)(tempBlock1, 16, &c); |
3602 else if(mode & H_DEBLOCK){ | 3602 else if(mode & H_DEBLOCK){ |
3603 //START_TIMER | 3603 //START_TIMER |
3604 const int t= RENAME(vertClassify)(tempBlock1, 16, &c); | 3604 const int t= RENAME(vertClassify)(tempBlock1, 16, &c); |
3615 | 3615 |
3616 #else | 3616 #else |
3617 if(mode & H_X1_FILTER) | 3617 if(mode & H_X1_FILTER) |
3618 horizX1Filter(dstBlock-4, stride, QP); | 3618 horizX1Filter(dstBlock-4, stride, QP); |
3619 else if(mode & H_DEBLOCK){ | 3619 else if(mode & H_DEBLOCK){ |
3620 #ifdef HAVE_ALTIVEC | 3620 #if HAVE_ALTIVEC |
3621 DECLARE_ALIGNED(16, unsigned char, tempBlock[272]); | 3621 DECLARE_ALIGNED(16, unsigned char, tempBlock[272]); |
3622 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); | 3622 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); |
3623 | 3623 |
3624 const int t=vertClassify_altivec(tempBlock-48, 16, &c); | 3624 const int t=vertClassify_altivec(tempBlock-48, 16, &c); |
3625 if(t==1) { | 3625 if(t==1) { |
3657 } | 3657 } |
3658 | 3658 |
3659 dstBlock+=8; | 3659 dstBlock+=8; |
3660 srcBlock+=8; | 3660 srcBlock+=8; |
3661 | 3661 |
3662 #ifdef HAVE_MMX | 3662 #if HAVE_MMX |
3663 tmpXchg= tempBlock1; | 3663 tmpXchg= tempBlock1; |
3664 tempBlock1= tempBlock2; | 3664 tempBlock1= tempBlock2; |
3665 tempBlock2 = tmpXchg; | 3665 tempBlock2 = tmpXchg; |
3666 #endif | 3666 #endif |
3667 } | 3667 } |
3697 + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride]; | 3697 + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride]; |
3698 + dstBlock[x +13*dstStride] | 3698 + dstBlock[x +13*dstStride] |
3699 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; | 3699 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; |
3700 }*/ | 3700 }*/ |
3701 } | 3701 } |
3702 #ifdef HAVE_3DNOW | 3702 #if HAVE_3DNOW |
3703 __asm__ volatile("femms"); | 3703 __asm__ volatile("femms"); |
3704 #elif defined (HAVE_MMX) | 3704 #elif HAVE_MMX |
3705 __asm__ volatile("emms"); | 3705 __asm__ volatile("emms"); |
3706 #endif | 3706 #endif |
3707 | 3707 |
3708 #ifdef DEBUG_BRIGHTNESS | 3708 #ifdef DEBUG_BRIGHTNESS |
3709 if(!isColor){ | 3709 if(!isColor){ |