comparison jrevdct.c @ 2262:7a1c3178d759 libavcodec

optimizing 4x4 idct
author michael
date Sun, 26 Sep 2004 16:33:39 +0000
parents 12e75af1d44c
children 258f21820108
comparison
equal deleted inserted replaced
2261:bd09f4d1976f 2262:7a1c3178d759
1182 int32_t tmp10, tmp11, tmp12, tmp13; 1182 int32_t tmp10, tmp11, tmp12, tmp13;
1183 int32_t z1; 1183 int32_t z1;
1184 int32_t d0, d2, d4, d6; 1184 int32_t d0, d2, d4, d6;
1185 register DCTELEM *dataptr; 1185 register DCTELEM *dataptr;
1186 int rowctr; 1186 int rowctr;
1187 1187
1188 /* Pass 1: process rows. */ 1188 /* Pass 1: process rows. */
1189 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 1189 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
1190 /* furthermore, we scale the results by 2**PASS1_BITS. */ 1190 /* furthermore, we scale the results by 2**PASS1_BITS. */
1191 1191
1192 data[0] += 4;
1193
1192 dataptr = data; 1194 dataptr = data;
1193 1195
1194 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { 1196 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
1195 /* Due to quantization, we will usually find that many of the input 1197 /* Due to quantization, we will usually find that many of the input
1196 * coefficients are zero, especially the AC terms. We can exploit this 1198 * coefficients are zero, especially the AC terms. We can exploit this
1220 } 1222 }
1221 1223
1222 dataptr += DCTSTRIDE; /* advance pointer to next row */ 1224 dataptr += DCTSTRIDE; /* advance pointer to next row */
1223 continue; 1225 continue;
1224 } 1226 }
1225 1227
1226 /* Even part: reverse the even part of the forward DCT. */ 1228 /* Even part: reverse the even part of the forward DCT. */
1227 /* The rotator is sqrt(2)*c(-6). */ 1229 /* The rotator is sqrt(2)*c(-6). */
1228 if (d6) { 1230 if (d6) {
1229 if (d4) {
1230 if (d2) { 1231 if (d2) {
1231 if (d0) {
1232 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 1232 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1233 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 1233 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1234 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 1234 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1235 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 1235 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1236 1236
1239 1239
1240 tmp10 = tmp0 + tmp3; 1240 tmp10 = tmp0 + tmp3;
1241 tmp13 = tmp0 - tmp3; 1241 tmp13 = tmp0 - tmp3;
1242 tmp11 = tmp1 + tmp2; 1242 tmp11 = tmp1 + tmp2;
1243 tmp12 = tmp1 - tmp2; 1243 tmp12 = tmp1 - tmp2;
1244 } else { 1244 } else {
1245 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
1246 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1247 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1248 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1249
1250 tmp0 = d4 << CONST_BITS;
1251
1252 tmp10 = tmp0 + tmp3;
1253 tmp13 = tmp0 - tmp3;
1254 tmp11 = tmp2 - tmp0;
1255 tmp12 = -(tmp0 + tmp2);
1256 }
1257 } else {
1258 if (d0) {
1259 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 1245 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1260 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 1246 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1261 tmp3 = MULTIPLY(d6, FIX_0_541196100); 1247 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1262 1248
1263 tmp0 = (d0 + d4) << CONST_BITS; 1249 tmp0 = (d0 + d4) << CONST_BITS;
1265 1251
1266 tmp10 = tmp0 + tmp3; 1252 tmp10 = tmp0 + tmp3;
1267 tmp13 = tmp0 - tmp3; 1253 tmp13 = tmp0 - tmp3;
1268 tmp11 = tmp1 + tmp2; 1254 tmp11 = tmp1 + tmp2;
1269 tmp12 = tmp1 - tmp2; 1255 tmp12 = tmp1 - tmp2;
1270 } else { 1256 }
1271 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ 1257 } else {
1272 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1273 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1274
1275 tmp0 = d4 << CONST_BITS;
1276
1277 tmp10 = tmp0 + tmp3;
1278 tmp13 = tmp0 - tmp3;
1279 tmp11 = tmp2 - tmp0;
1280 tmp12 = -(tmp0 + tmp2);
1281 }
1282 }
1283 } else {
1284 if (d2) { 1258 if (d2) {
1285 if (d0) {
1286 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
1287 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1288 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1289 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1290
1291 tmp0 = d0 << CONST_BITS;
1292
1293 tmp10 = tmp0 + tmp3;
1294 tmp13 = tmp0 - tmp3;
1295 tmp11 = tmp0 + tmp2;
1296 tmp12 = tmp0 - tmp2;
1297 } else {
1298 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
1299 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1300 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1301 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1302
1303 tmp10 = tmp3;
1304 tmp13 = -tmp3;
1305 tmp11 = tmp2;
1306 tmp12 = -tmp2;
1307 }
1308 } else {
1309 if (d0) {
1310 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
1311 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1312 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1313
1314 tmp0 = d0 << CONST_BITS;
1315
1316 tmp10 = tmp0 + tmp3;
1317 tmp13 = tmp0 - tmp3;
1318 tmp11 = tmp0 + tmp2;
1319 tmp12 = tmp0 - tmp2;
1320 } else {
1321 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
1322 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1323 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1324
1325 tmp10 = tmp3;
1326 tmp13 = -tmp3;
1327 tmp11 = tmp2;
1328 tmp12 = -tmp2;
1329 }
1330 }
1331 }
1332 } else {
1333 if (d4) {
1334 if (d2) {
1335 if (d0) {
1336 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 1259 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1337 tmp2 = MULTIPLY(d2, FIX_0_541196100); 1260 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1338 tmp3 = MULTIPLY(d2, FIX_1_306562965); 1261 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1339 1262
1340 tmp0 = (d0 + d4) << CONST_BITS; 1263 tmp0 = (d0 + d4) << CONST_BITS;
1342 1265
1343 tmp10 = tmp0 + tmp3; 1266 tmp10 = tmp0 + tmp3;
1344 tmp13 = tmp0 - tmp3; 1267 tmp13 = tmp0 - tmp3;
1345 tmp11 = tmp1 + tmp2; 1268 tmp11 = tmp1 + tmp2;
1346 tmp12 = tmp1 - tmp2; 1269 tmp12 = tmp1 - tmp2;
1347 } else { 1270 } else {
1348 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
1349 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1350 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1351
1352 tmp0 = d4 << CONST_BITS;
1353
1354 tmp10 = tmp0 + tmp3;
1355 tmp13 = tmp0 - tmp3;
1356 tmp11 = tmp2 - tmp0;
1357 tmp12 = -(tmp0 + tmp2);
1358 }
1359 } else {
1360 if (d0) {
1361 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 1271 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1362 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 1272 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1363 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 1273 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1364 } else { 1274 }
1365 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
1366 tmp10 = tmp13 = d4 << CONST_BITS;
1367 tmp11 = tmp12 = -tmp10;
1368 }
1369 }
1370 } else {
1371 if (d2) {
1372 if (d0) {
1373 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
1374 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1375 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1376
1377 tmp0 = d0 << CONST_BITS;
1378
1379 tmp10 = tmp0 + tmp3;
1380 tmp13 = tmp0 - tmp3;
1381 tmp11 = tmp0 + tmp2;
1382 tmp12 = tmp0 - tmp2;
1383 } else {
1384 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1385 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1386 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1387
1388 tmp10 = tmp3;
1389 tmp13 = -tmp3;
1390 tmp11 = tmp2;
1391 tmp12 = -tmp2;
1392 }
1393 } else {
1394 if (d0) {
1395 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1396 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1397 } else {
1398 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1399 tmp10 = tmp13 = tmp11 = tmp12 = 0;
1400 }
1401 }
1402 }
1403 } 1275 }
1404 1276
1405 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1277 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1406 1278
1407 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); 1279 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
1432 d6 = dataptr[DCTSTRIDE*3]; 1304 d6 = dataptr[DCTSTRIDE*3];
1433 1305
1434 /* Even part: reverse the even part of the forward DCT. */ 1306 /* Even part: reverse the even part of the forward DCT. */
1435 /* The rotator is sqrt(2)*c(-6). */ 1307 /* The rotator is sqrt(2)*c(-6). */
1436 if (d6) { 1308 if (d6) {
1437 if (d4) {
1438 if (d2) { 1309 if (d2) {
1439 if (d0) {
1440 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ 1310 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1441 z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 1311 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1442 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); 1312 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1443 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 1313 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1444 1314
1447 1317
1448 tmp10 = tmp0 + tmp3; 1318 tmp10 = tmp0 + tmp3;
1449 tmp13 = tmp0 - tmp3; 1319 tmp13 = tmp0 - tmp3;
1450 tmp11 = tmp1 + tmp2; 1320 tmp11 = tmp1 + tmp2;
1451 tmp12 = tmp1 - tmp2; 1321 tmp12 = tmp1 - tmp2;
1452 } else { 1322 } else {
1453 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
1454 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1455 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1456 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1457
1458 tmp0 = d4 << CONST_BITS;
1459
1460 tmp10 = tmp0 + tmp3;
1461 tmp13 = tmp0 - tmp3;
1462 tmp11 = tmp2 - tmp0;
1463 tmp12 = -(tmp0 + tmp2);
1464 }
1465 } else {
1466 if (d0) {
1467 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ 1323 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1468 tmp2 = MULTIPLY(-d6, FIX_1_306562965); 1324 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1469 tmp3 = MULTIPLY(d6, FIX_0_541196100); 1325 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1470 1326
1471 tmp0 = (d0 + d4) << CONST_BITS; 1327 tmp0 = (d0 + d4) << CONST_BITS;
1473 1329
1474 tmp10 = tmp0 + tmp3; 1330 tmp10 = tmp0 + tmp3;
1475 tmp13 = tmp0 - tmp3; 1331 tmp13 = tmp0 - tmp3;
1476 tmp11 = tmp1 + tmp2; 1332 tmp11 = tmp1 + tmp2;
1477 tmp12 = tmp1 - tmp2; 1333 tmp12 = tmp1 - tmp2;
1478 } else { 1334 }
1479 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ 1335 } else {
1480 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1481 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1482
1483 tmp0 = d4 << CONST_BITS;
1484
1485 tmp10 = tmp0 + tmp3;
1486 tmp13 = tmp0 - tmp3;
1487 tmp11 = tmp2 - tmp0;
1488 tmp12 = -(tmp0 + tmp2);
1489 }
1490 }
1491 } else {
1492 if (d2) { 1336 if (d2) {
1493 if (d0) {
1494 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
1495 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1496 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1497 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1498
1499 tmp0 = d0 << CONST_BITS;
1500
1501 tmp10 = tmp0 + tmp3;
1502 tmp13 = tmp0 - tmp3;
1503 tmp11 = tmp0 + tmp2;
1504 tmp12 = tmp0 - tmp2;
1505 } else {
1506 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
1507 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1508 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1509 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1510
1511 tmp10 = tmp3;
1512 tmp13 = -tmp3;
1513 tmp11 = tmp2;
1514 tmp12 = -tmp2;
1515 }
1516 } else {
1517 if (d0) {
1518 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
1519 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1520 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1521
1522 tmp0 = d0 << CONST_BITS;
1523
1524 tmp10 = tmp0 + tmp3;
1525 tmp13 = tmp0 - tmp3;
1526 tmp11 = tmp0 + tmp2;
1527 tmp12 = tmp0 - tmp2;
1528 } else {
1529 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
1530 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1531 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1532
1533 tmp10 = tmp3;
1534 tmp13 = -tmp3;
1535 tmp11 = tmp2;
1536 tmp12 = -tmp2;
1537 }
1538 }
1539 }
1540 } else {
1541 if (d4) {
1542 if (d2) {
1543 if (d0) {
1544 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ 1337 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1545 tmp2 = MULTIPLY(d2, FIX_0_541196100); 1338 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1546 tmp3 = MULTIPLY(d2, FIX_1_306562965); 1339 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1547 1340
1548 tmp0 = (d0 + d4) << CONST_BITS; 1341 tmp0 = (d0 + d4) << CONST_BITS;
1550 1343
1551 tmp10 = tmp0 + tmp3; 1344 tmp10 = tmp0 + tmp3;
1552 tmp13 = tmp0 - tmp3; 1345 tmp13 = tmp0 - tmp3;
1553 tmp11 = tmp1 + tmp2; 1346 tmp11 = tmp1 + tmp2;
1554 tmp12 = tmp1 - tmp2; 1347 tmp12 = tmp1 - tmp2;
1555 } else { 1348 } else {
1556 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
1557 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1558 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1559
1560 tmp0 = d4 << CONST_BITS;
1561
1562 tmp10 = tmp0 + tmp3;
1563 tmp13 = tmp0 - tmp3;
1564 tmp11 = tmp2 - tmp0;
1565 tmp12 = -(tmp0 + tmp2);
1566 }
1567 } else {
1568 if (d0) {
1569 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ 1349 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1570 tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 1350 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1571 tmp11 = tmp12 = (d0 - d4) << CONST_BITS; 1351 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1572 } else { 1352 }
1573 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
1574 tmp10 = tmp13 = d4 << CONST_BITS;
1575 tmp11 = tmp12 = -tmp10;
1576 }
1577 }
1578 } else {
1579 if (d2) {
1580 if (d0) {
1581 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
1582 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1583 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1584
1585 tmp0 = d0 << CONST_BITS;
1586
1587 tmp10 = tmp0 + tmp3;
1588 tmp13 = tmp0 - tmp3;
1589 tmp11 = tmp0 + tmp2;
1590 tmp12 = tmp0 - tmp2;
1591 } else {
1592 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1593 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1594 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1595
1596 tmp10 = tmp3;
1597 tmp13 = -tmp3;
1598 tmp11 = tmp2;
1599 tmp12 = -tmp2;
1600 }
1601 } else {
1602 if (d0) {
1603 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1604 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1605 } else {
1606 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1607 tmp10 = tmp13 = tmp11 = tmp12 = 0;
1608 }
1609 }
1610 }
1611 } 1353 }
1612 1354
1613 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1355 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1614 1356
1615 dataptr[DCTSTRIDE*0] = (DCTELEM) DESCALE(tmp10, 1357 dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
1616 CONST_BITS+PASS1_BITS+3); 1358 dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
1617 dataptr[DCTSTRIDE*1] = (DCTELEM) DESCALE(tmp11, 1359 dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
1618 CONST_BITS+PASS1_BITS+3); 1360 dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
1619 dataptr[DCTSTRIDE*2] = (DCTELEM) DESCALE(tmp12,
1620 CONST_BITS+PASS1_BITS+3);
1621 dataptr[DCTSTRIDE*3] = (DCTELEM) DESCALE(tmp13,
1622 CONST_BITS+PASS1_BITS+3);
1623 1361
1624 dataptr++; /* advance pointer to next column */ 1362 dataptr++; /* advance pointer to next column */
1625 } 1363 }
1626 } 1364 }
1627 1365