comparison libswscale/rgb2rgb_template.c @ 22994:ac77d9ef8c83

slightly faster rgb32tobgr32; avoid one add and one cmp
author ivo
date Tue, 17 Apr 2007 20:38:17 +0000
parents 59671a52cc82
children 70d7c6206f33
comparison
equal deleted inserted replaced
22993:5e6fa9cabacc 22994:ac77d9ef8c83
1362 } 1362 }
1363 } 1363 }
1364 1364
1365 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) 1365 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
1366 { 1366 {
1367 uint8_t *d = dst, *s = (uint8_t *) src; 1367 long idx = 15 - src_size;
1368 const uint8_t *end = s + src_size; 1368 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
1369 #ifdef HAVE_MMX 1369 #ifdef HAVE_MMX
1370 __asm __volatile( 1370 __asm __volatile(
1371 " "PREFETCH" (%1) \n" 1371 " test %0, %0 \n"
1372 " jns 2f \n"
1373 " "PREFETCH" (%1, %0) \n"
1372 " movq %3, %%mm7 \n" 1374 " movq %3, %%mm7 \n"
1373 " pxor %4, %%mm7 \n" 1375 " pxor %4, %%mm7 \n"
1374 " movq %%mm7, %%mm6 \n" 1376 " movq %%mm7, %%mm6 \n"
1375 " pxor %5, %%mm7 \n" 1377 " pxor %5, %%mm7 \n"
1376 " jmp 2f \n"
1377 ASMALIGN(4) 1378 ASMALIGN(4)
1378 "1: \n" 1379 "1: \n"
1379 " "PREFETCH" 32(%1) \n" 1380 " "PREFETCH" 32(%1, %0) \n"
1380 " movq (%1), %%mm0 \n" 1381 " movq (%1, %0), %%mm0 \n"
1381 " movq 8(%1), %%mm1 \n" 1382 " movq 8(%1, %0), %%mm1 \n"
1382 # ifdef HAVE_MMX2 1383 # ifdef HAVE_MMX2
1383 " pshufw $177, %%mm0, %%mm3 \n" 1384 " pshufw $177, %%mm0, %%mm3 \n"
1384 " pshufw $177, %%mm1, %%mm5 \n" 1385 " pshufw $177, %%mm1, %%mm5 \n"
1385 " pand %%mm7, %%mm0 \n" 1386 " pand %%mm7, %%mm0 \n"
1386 " pand %%mm6, %%mm3 \n" 1387 " pand %%mm6, %%mm3 \n"
1404 " por %%mm2, %%mm0 \n" 1405 " por %%mm2, %%mm0 \n"
1405 " por %%mm4, %%mm1 \n" 1406 " por %%mm4, %%mm1 \n"
1406 " por %%mm3, %%mm0 \n" 1407 " por %%mm3, %%mm0 \n"
1407 " por %%mm5, %%mm1 \n" 1408 " por %%mm5, %%mm1 \n"
1408 # endif 1409 # endif
1409 " "MOVNTQ" %%mm0, (%0) \n" 1410 " "MOVNTQ" %%mm0, (%2, %0) \n"
1410 " "MOVNTQ" %%mm1, 8(%0) \n" 1411 " "MOVNTQ" %%mm1, 8(%2, %0) \n"
1411 " add $16, %0 \n" 1412 " add $16, %0 \n"
1412 " add $16, %1 \n" 1413 " js 1b \n"
1413 "2: \n"
1414 " cmp %1, %2 \n"
1415 " ja 1b \n"
1416 " "SFENCE" \n" 1414 " "SFENCE" \n"
1417 " "EMMS" \n" 1415 " "EMMS" \n"
1418 : "+r"(d), "+r"(s) 1416 "2: \n"
1419 : "r" (end-15), "m" (mask32b), "m" (mask32r), "m" (mmx_one) 1417 : "+&r"(idx)
1418 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
1420 : "memory"); 1419 : "memory");
1421 #endif 1420 #endif
1422 for (; s<end; s+=4, d+=4) { 1421 for (; idx<15; idx+=4) {
1423 int v = *(uint32_t *)s, g = v & 0xff00; 1422 register int v = *(uint32_t *)&s[idx], g = v & 0xff00;
1424 v &= 0xff00ff; 1423 v &= 0xff00ff;
1425 *(uint32_t *)d = (v>>16) + g + (v<<16); 1424 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
1426 } 1425 }
1427 } 1426 }
1428 1427
1429 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) 1428 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
1430 { 1429 {