Forward DWT 5x3: performance improvements in horizontal pass, and modest in vertical pass

This commit is contained in:
Even Rouault 2020-05-22 15:03:40 +02:00
parent bd5f5ee7de
commit 97b384aecd
No known key found for this signature in database
GPG Key ID: 33EBBFC47B3DD87D
1 changed files with 66 additions and 14 deletions

View File

@ -339,23 +339,37 @@ static void opj_dwt_encode_1(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn,
OPJ_INT32* a = (OPJ_INT32*)aIn; OPJ_INT32* a = (OPJ_INT32*)aIn;
if (!cas) { if (!cas) {
if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ if (sn + dn > 1) {
for (i = 0; i < dn; i++) { for (i = 0; i < sn - 1; i++) {
OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; OPJ_D(i) -= (OPJ_S(i) + OPJ_S(i + 1)) >> 1;
} }
for (i = 0; i < sn; i++) { if (((sn + dn) % 2) == 0) {
OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; OPJ_D(i) -= OPJ_S(i);
}
OPJ_S(0) += (OPJ_D(0) + OPJ_D(0) + 2) >> 2;
for (i = 1; i < dn; i++) {
OPJ_S(i) += (OPJ_D(i - 1) + OPJ_D(i) + 2) >> 2;
}
if (((sn + dn) % 2) == 1) {
OPJ_S(i) += (OPJ_D(i - 1) + OPJ_D(i - 1) + 2) >> 2;
} }
} }
} else { } else {
if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ if (sn + dn == 1) {
OPJ_S(0) *= 2; a[0] *= 2;
} else { } else {
for (i = 0; i < dn; i++) { OPJ_S(0) -= OPJ_D(0);
OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; for (i = 1; i < sn; i++) {
OPJ_S(i) -= (OPJ_D(i) + OPJ_D(i - 1)) >> 1;
} }
for (i = 0; i < sn; i++) { if (((sn + dn) % 2) == 1) {
OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; OPJ_S(i) -= OPJ_D(i - 1);
}
for (i = 0; i < dn - 1; i++) {
OPJ_D(i) += (OPJ_S(i) + OPJ_S(i + 1) + 2) >> 2;
}
if (((sn + dn) % 2) == 0) {
OPJ_D(i) += (OPJ_S(i) + OPJ_S(i) + 2) >> 2;
} }
} }
} }
@ -1122,9 +1136,47 @@ void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn,
OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn; OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn;
const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
memcpy(tmp, row, width * sizeof(OPJ_INT32));
opj_dwt_encode_1(tmp, dn, sn, even ? 0 : 1); if (even) {
opj_dwt_deinterleave_h(tmp, row, dn, sn, even ? 0 : 1); if (width > 1) {
OPJ_INT32 i;
for (i = 0; i < sn - 1; i++) {
tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1);
}
if ((width % 2) == 0) {
tmp[sn + i] = row[2 * i + 1] - row[(i) * 2];
}
row[0] += (tmp[sn] + tmp[sn] + 2) >> 2;
for (i = 1; i < dn; i++) {
row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2);
}
if ((width % 2) == 1) {
row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2);
}
memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
}
} else {
if (width == 1) {
row[0] *= 2;
} else {
OPJ_INT32 i;
tmp[sn + 0] = row[0] - row[1];
for (i = 1; i < sn; i++) {
tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1);
}
if ((width % 2) == 1) {
tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1];
}
for (i = 0; i < dn - 1; i++) {
row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2);
}
if ((width % 2) == 0) {
row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2);
}
memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
}
}
} }
/** Process one line for the horizontal pass of the 9x7 forward transform */ /** Process one line for the horizontal pass of the 9x7 forward transform */