comparison jrevdct.c @ 2256:7e0b2e86afa9 libavcodec

1/2 resolution decoding
author michael
date Sat, 25 Sep 2004 23:18:58 +0000
parents 1e39f273ecd6
children 5f64a30339e5
comparison
equal deleted inserted replaced
2255:507690ff49a2 2256:7e0b2e86afa9
1170 1170
1171 dataptr++; /* advance pointer to next column */ 1171 dataptr++; /* advance pointer to next column */
1172 } 1172 }
1173 } 1173 }
1174 1174
1175 #undef DCTSIZE
1176 #define DCTSIZE 4
1177 #define DCTSTRIDE 8
1178
1179 void j_rev_dct4(DCTBLOCK data)
1180 {
1181 int32_t tmp0, tmp1, tmp2, tmp3;
1182 int32_t tmp10, tmp11, tmp12, tmp13;
1183 int32_t z1;
1184 int32_t d0, d2, d4, d6;
1185 register DCTELEM *dataptr;
1186 int rowctr;
1187
1188 /* Pass 1: process rows. */
1189 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
1190 /* furthermore, we scale the results by 2**PASS1_BITS. */
1191
1192 dataptr = data;
1193
1194 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
1195 /* Due to quantization, we will usually find that many of the input
1196 * coefficients are zero, especially the AC terms. We can exploit this
1197 * by short-circuiting the IDCT calculation for any row in which all
1198 * the AC terms are zero. In that case each output is equal to the
1199 * DC coefficient (with scale factor as needed).
1200 * With typical images and quantization tables, half or more of the
1201 * row DCT calculations can be simplified this way.
1202 */
1203
1204 register int *idataptr = (int*)dataptr;
1205
1206 /* WARNING: we do the same permutation as MMX idct to simplify the
1207 video core */
1208 d0 = dataptr[0];
1209 d2 = dataptr[1];
1210 d4 = dataptr[2];
1211 d6 = dataptr[3];
1212
1213 if ((d2 | d4 | d6) == 0) {
1214 /* AC terms all zero */
1215 if (d0) {
1216 /* Compute a 32 bit value to assign. */
1217 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
1218 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
1219
1220 idataptr[0] = v;
1221 idataptr[1] = v;
1222 }
1223
1224 dataptr += DCTSTRIDE; /* advance pointer to next row */
1225 continue;
1226 }
1227
1228 /* Even part: reverse the even part of the forward DCT. */
1229 /* The rotator is sqrt(2)*c(-6). */
1230 if (d6) {
1231 if (d4) {
1232 if (d2) {
1233 if (d0) {
1234 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1235 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1236 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1237 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1238
1239 tmp0 = (d0 + d4) << CONST_BITS;
1240 tmp1 = (d0 - d4) << CONST_BITS;
1241
1242 tmp10 = tmp0 + tmp3;
1243 tmp13 = tmp0 - tmp3;
1244 tmp11 = tmp1 + tmp2;
1245 tmp12 = tmp1 - tmp2;
1246 } else {
1247 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
1248 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1249 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1250 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1251
1252 tmp0 = d4 << CONST_BITS;
1253
1254 tmp10 = tmp0 + tmp3;
1255 tmp13 = tmp0 - tmp3;
1256 tmp11 = tmp2 - tmp0;
1257 tmp12 = -(tmp0 + tmp2);
1258 }
1259 } else {
1260 if (d0) {
1261 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1262 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1263 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1264
1265 tmp0 = (d0 + d4) << CONST_BITS;
1266 tmp1 = (d0 - d4) << CONST_BITS;
1267
1268 tmp10 = tmp0 + tmp3;
1269 tmp13 = tmp0 - tmp3;
1270 tmp11 = tmp1 + tmp2;
1271 tmp12 = tmp1 - tmp2;
1272 } else {
1273 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
1274 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1275 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1276
1277 tmp0 = d4 << CONST_BITS;
1278
1279 tmp10 = tmp0 + tmp3;
1280 tmp13 = tmp0 - tmp3;
1281 tmp11 = tmp2 - tmp0;
1282 tmp12 = -(tmp0 + tmp2);
1283 }
1284 }
1285 } else {
1286 if (d2) {
1287 if (d0) {
1288 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
1289 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1290 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1291 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1292
1293 tmp0 = d0 << CONST_BITS;
1294
1295 tmp10 = tmp0 + tmp3;
1296 tmp13 = tmp0 - tmp3;
1297 tmp11 = tmp0 + tmp2;
1298 tmp12 = tmp0 - tmp2;
1299 } else {
1300 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
1301 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1302 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1303 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1304
1305 tmp10 = tmp3;
1306 tmp13 = -tmp3;
1307 tmp11 = tmp2;
1308 tmp12 = -tmp2;
1309 }
1310 } else {
1311 if (d0) {
1312 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
1313 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1314 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1315
1316 tmp0 = d0 << CONST_BITS;
1317
1318 tmp10 = tmp0 + tmp3;
1319 tmp13 = tmp0 - tmp3;
1320 tmp11 = tmp0 + tmp2;
1321 tmp12 = tmp0 - tmp2;
1322 } else {
1323 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
1324 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1325 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1326
1327 tmp10 = tmp3;
1328 tmp13 = -tmp3;
1329 tmp11 = tmp2;
1330 tmp12 = -tmp2;
1331 }
1332 }
1333 }
1334 } else {
1335 if (d4) {
1336 if (d2) {
1337 if (d0) {
1338 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1339 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1340 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1341
1342 tmp0 = (d0 + d4) << CONST_BITS;
1343 tmp1 = (d0 - d4) << CONST_BITS;
1344
1345 tmp10 = tmp0 + tmp3;
1346 tmp13 = tmp0 - tmp3;
1347 tmp11 = tmp1 + tmp2;
1348 tmp12 = tmp1 - tmp2;
1349 } else {
1350 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
1351 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1352 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1353
1354 tmp0 = d4 << CONST_BITS;
1355
1356 tmp10 = tmp0 + tmp3;
1357 tmp13 = tmp0 - tmp3;
1358 tmp11 = tmp2 - tmp0;
1359 tmp12 = -(tmp0 + tmp2);
1360 }
1361 } else {
1362 if (d0) {
1363 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1364 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1365 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1366 } else {
1367 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
1368 tmp10 = tmp13 = d4 << CONST_BITS;
1369 tmp11 = tmp12 = -tmp10;
1370 }
1371 }
1372 } else {
1373 if (d2) {
1374 if (d0) {
1375 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
1376 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1377 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1378
1379 tmp0 = d0 << CONST_BITS;
1380
1381 tmp10 = tmp0 + tmp3;
1382 tmp13 = tmp0 - tmp3;
1383 tmp11 = tmp0 + tmp2;
1384 tmp12 = tmp0 - tmp2;
1385 } else {
1386 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1387 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1388 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1389
1390 tmp10 = tmp3;
1391 tmp13 = -tmp3;
1392 tmp11 = tmp2;
1393 tmp12 = -tmp2;
1394 }
1395 } else {
1396 if (d0) {
1397 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1398 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1399 } else {
1400 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1401 tmp10 = tmp13 = tmp11 = tmp12 = 0;
1402 }
1403 }
1404 }
1405 }
1406
1407 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1408
1409 dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
1410 dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
1411 dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
1412 dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
1413
1414 dataptr += DCTSTRIDE; /* advance pointer to next row */
1415 }
1416
1417 /* Pass 2: process columns. */
1418 /* Note that we must descale the results by a factor of 8 == 2**3, */
1419 /* and also undo the PASS1_BITS scaling. */
1420
1421 dataptr = data;
1422 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
1423 /* Columns of zeroes can be exploited in the same way as we did with rows.
1424 * However, the row calculation has created many nonzero AC terms, so the
1425 * simplification applies less often (typically 5% to 10% of the time).
1426 * On machines with very fast multiplication, it's possible that the
1427 * test takes more time than it's worth. In that case this section
1428 * may be commented out.
1429 */
1430
1431 d0 = dataptr[DCTSTRIDE*0];
1432 d2 = dataptr[DCTSTRIDE*1];
1433 d4 = dataptr[DCTSTRIDE*2];
1434 d6 = dataptr[DCTSTRIDE*3];
1435
1436 /* Even part: reverse the even part of the forward DCT. */
1437 /* The rotator is sqrt(2)*c(-6). */
1438 if (d6) {
1439 if (d4) {
1440 if (d2) {
1441 if (d0) {
1442 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
1443 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1444 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1445 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1446
1447 tmp0 = (d0 + d4) << CONST_BITS;
1448 tmp1 = (d0 - d4) << CONST_BITS;
1449
1450 tmp10 = tmp0 + tmp3;
1451 tmp13 = tmp0 - tmp3;
1452 tmp11 = tmp1 + tmp2;
1453 tmp12 = tmp1 - tmp2;
1454 } else {
1455 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
1456 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1457 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1458 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1459
1460 tmp0 = d4 << CONST_BITS;
1461
1462 tmp10 = tmp0 + tmp3;
1463 tmp13 = tmp0 - tmp3;
1464 tmp11 = tmp2 - tmp0;
1465 tmp12 = -(tmp0 + tmp2);
1466 }
1467 } else {
1468 if (d0) {
1469 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
1470 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1471 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1472
1473 tmp0 = (d0 + d4) << CONST_BITS;
1474 tmp1 = (d0 - d4) << CONST_BITS;
1475
1476 tmp10 = tmp0 + tmp3;
1477 tmp13 = tmp0 - tmp3;
1478 tmp11 = tmp1 + tmp2;
1479 tmp12 = tmp1 - tmp2;
1480 } else {
1481 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
1482 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1483 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1484
1485 tmp0 = d4 << CONST_BITS;
1486
1487 tmp10 = tmp0 + tmp3;
1488 tmp13 = tmp0 - tmp3;
1489 tmp11 = tmp2 - tmp0;
1490 tmp12 = -(tmp0 + tmp2);
1491 }
1492 }
1493 } else {
1494 if (d2) {
1495 if (d0) {
1496 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
1497 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1498 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1499 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1500
1501 tmp0 = d0 << CONST_BITS;
1502
1503 tmp10 = tmp0 + tmp3;
1504 tmp13 = tmp0 - tmp3;
1505 tmp11 = tmp0 + tmp2;
1506 tmp12 = tmp0 - tmp2;
1507 } else {
1508 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
1509 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
1510 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
1511 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
1512
1513 tmp10 = tmp3;
1514 tmp13 = -tmp3;
1515 tmp11 = tmp2;
1516 tmp12 = -tmp2;
1517 }
1518 } else {
1519 if (d0) {
1520 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
1521 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1522 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1523
1524 tmp0 = d0 << CONST_BITS;
1525
1526 tmp10 = tmp0 + tmp3;
1527 tmp13 = tmp0 - tmp3;
1528 tmp11 = tmp0 + tmp2;
1529 tmp12 = tmp0 - tmp2;
1530 } else {
1531 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
1532 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
1533 tmp3 = MULTIPLY(d6, FIX_0_541196100);
1534
1535 tmp10 = tmp3;
1536 tmp13 = -tmp3;
1537 tmp11 = tmp2;
1538 tmp12 = -tmp2;
1539 }
1540 }
1541 }
1542 } else {
1543 if (d4) {
1544 if (d2) {
1545 if (d0) {
1546 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
1547 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1548 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1549
1550 tmp0 = (d0 + d4) << CONST_BITS;
1551 tmp1 = (d0 - d4) << CONST_BITS;
1552
1553 tmp10 = tmp0 + tmp3;
1554 tmp13 = tmp0 - tmp3;
1555 tmp11 = tmp1 + tmp2;
1556 tmp12 = tmp1 - tmp2;
1557 } else {
1558 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
1559 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1560 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1561
1562 tmp0 = d4 << CONST_BITS;
1563
1564 tmp10 = tmp0 + tmp3;
1565 tmp13 = tmp0 - tmp3;
1566 tmp11 = tmp2 - tmp0;
1567 tmp12 = -(tmp0 + tmp2);
1568 }
1569 } else {
1570 if (d0) {
1571 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
1572 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
1573 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
1574 } else {
1575 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
1576 tmp10 = tmp13 = d4 << CONST_BITS;
1577 tmp11 = tmp12 = -tmp10;
1578 }
1579 }
1580 } else {
1581 if (d2) {
1582 if (d0) {
1583 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
1584 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1585 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1586
1587 tmp0 = d0 << CONST_BITS;
1588
1589 tmp10 = tmp0 + tmp3;
1590 tmp13 = tmp0 - tmp3;
1591 tmp11 = tmp0 + tmp2;
1592 tmp12 = tmp0 - tmp2;
1593 } else {
1594 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
1595 tmp2 = MULTIPLY(d2, FIX_0_541196100);
1596 tmp3 = MULTIPLY(d2, FIX_1_306562965);
1597
1598 tmp10 = tmp3;
1599 tmp13 = -tmp3;
1600 tmp11 = tmp2;
1601 tmp12 = -tmp2;
1602 }
1603 } else {
1604 if (d0) {
1605 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
1606 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
1607 } else {
1608 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
1609 tmp10 = tmp13 = tmp11 = tmp12 = 0;
1610 }
1611 }
1612 }
1613 }
1614
1615 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1616
1617 dataptr[DCTSTRIDE*0] = (DCTELEM) DESCALE(tmp10,
1618 CONST_BITS+PASS1_BITS+3);
1619 dataptr[DCTSTRIDE*1] = (DCTELEM) DESCALE(tmp11,
1620 CONST_BITS+PASS1_BITS+3);
1621 dataptr[DCTSTRIDE*2] = (DCTELEM) DESCALE(tmp12,
1622 CONST_BITS+PASS1_BITS+3);
1623 dataptr[DCTSTRIDE*3] = (DCTELEM) DESCALE(tmp13,
1624 CONST_BITS+PASS1_BITS+3);
1625
1626 dataptr++; /* advance pointer to next column */
1627 }
1628 }
1629
1630
1175 #undef FIX 1631 #undef FIX
1176 #undef CONST_BITS 1632 #undef CONST_BITS