Mercurial > mplayer.hg
comparison libswscale/rgb2rgb_template.c @ 22994:ac77d9ef8c83
slightly faster rgb32tobgr32; avoid one add and one cmp
author | ivo |
---|---|
date | Tue, 17 Apr 2007 20:38:17 +0000 |
parents | 59671a52cc82 |
children | 70d7c6206f33 |
comparison
equal
deleted
inserted
replaced
22993:5e6fa9cabacc | 22994:ac77d9ef8c83 |
---|---|
1362 } | 1362 } |
1363 } | 1363 } |
1364 | 1364 |
1365 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) | 1365 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) |
1366 { | 1366 { |
1367 uint8_t *d = dst, *s = (uint8_t *) src; | 1367 long idx = 15 - src_size; |
1368 const uint8_t *end = s + src_size; | 1368 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx; |
1369 #ifdef HAVE_MMX | 1369 #ifdef HAVE_MMX |
1370 __asm __volatile( | 1370 __asm __volatile( |
1371 " "PREFETCH" (%1) \n" | 1371 " test %0, %0 \n" |
1372 " jns 2f \n" | |
1373 " "PREFETCH" (%1, %0) \n" | |
1372 " movq %3, %%mm7 \n" | 1374 " movq %3, %%mm7 \n" |
1373 " pxor %4, %%mm7 \n" | 1375 " pxor %4, %%mm7 \n" |
1374 " movq %%mm7, %%mm6 \n" | 1376 " movq %%mm7, %%mm6 \n" |
1375 " pxor %5, %%mm7 \n" | 1377 " pxor %5, %%mm7 \n" |
1376 " jmp 2f \n" | |
1377 ASMALIGN(4) | 1378 ASMALIGN(4) |
1378 "1: \n" | 1379 "1: \n" |
1379 " "PREFETCH" 32(%1) \n" | 1380 " "PREFETCH" 32(%1, %0) \n" |
1380 " movq (%1), %%mm0 \n" | 1381 " movq (%1, %0), %%mm0 \n" |
1381 " movq 8(%1), %%mm1 \n" | 1382 " movq 8(%1, %0), %%mm1 \n" |
1382 # ifdef HAVE_MMX2 | 1383 # ifdef HAVE_MMX2 |
1383 " pshufw $177, %%mm0, %%mm3 \n" | 1384 " pshufw $177, %%mm0, %%mm3 \n" |
1384 " pshufw $177, %%mm1, %%mm5 \n" | 1385 " pshufw $177, %%mm1, %%mm5 \n" |
1385 " pand %%mm7, %%mm0 \n" | 1386 " pand %%mm7, %%mm0 \n" |
1386 " pand %%mm6, %%mm3 \n" | 1387 " pand %%mm6, %%mm3 \n" |
1404 " por %%mm2, %%mm0 \n" | 1405 " por %%mm2, %%mm0 \n" |
1405 " por %%mm4, %%mm1 \n" | 1406 " por %%mm4, %%mm1 \n" |
1406 " por %%mm3, %%mm0 \n" | 1407 " por %%mm3, %%mm0 \n" |
1407 " por %%mm5, %%mm1 \n" | 1408 " por %%mm5, %%mm1 \n" |
1408 # endif | 1409 # endif |
1409 " "MOVNTQ" %%mm0, (%0) \n" | 1410 " "MOVNTQ" %%mm0, (%2, %0) \n" |
1410 " "MOVNTQ" %%mm1, 8(%0) \n" | 1411 " "MOVNTQ" %%mm1, 8(%2, %0) \n" |
1411 " add $16, %0 \n" | 1412 " add $16, %0 \n" |
1412 " add $16, %1 \n" | 1413 " js 1b \n" |
1413 "2: \n" | |
1414 " cmp %1, %2 \n" | |
1415 " ja 1b \n" | |
1416 " "SFENCE" \n" | 1414 " "SFENCE" \n" |
1417 " "EMMS" \n" | 1415 " "EMMS" \n" |
1418 : "+r"(d), "+r"(s) | 1416 "2: \n" |
1419 : "r" (end-15), "m" (mask32b), "m" (mask32r), "m" (mmx_one) | 1417 : "+&r"(idx) |
1418 : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) | |
1420 : "memory"); | 1419 : "memory"); |
1421 #endif | 1420 #endif |
1422 for (; s<end; s+=4, d+=4) { | 1421 for (; idx<15; idx+=4) { |
1423 int v = *(uint32_t *)s, g = v & 0xff00; | 1422 register int v = *(uint32_t *)&s[idx], g = v & 0xff00; |
1424 v &= 0xff00ff; | 1423 v &= 0xff00ff; |
1425 *(uint32_t *)d = (v>>16) + g + (v<<16); | 1424 *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); |
1426 } | 1425 } |
1427 } | 1426 } |
1428 | 1427 |
1429 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) | 1428 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size) |
1430 { | 1429 { |