From e46e300de55aa35279935ef9e8572949d725d833 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 20 May 2020 11:36:05 +0200 Subject: [PATCH] opj_dwt_encode_1_real(): avoid many bound comparisons, similarly to decoding side --- src/lib/openjp2/dwt.c | 108 ++++++++++++++++++++++++------------------ 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index bdc91cf5..a825f013 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1040,59 +1040,77 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, #endif } +static void opj_dwt_encode_step1(OPJ_FLOAT32* fw, + OPJ_UINT32 start, + OPJ_UINT32 end, + const OPJ_FLOAT32 c) +{ + OPJ_UINT32 i; + for (i = start; i < end; ++i) { + fw[i * 2] *= c; + } +} +static void opj_dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, + OPJ_UINT32 start, + OPJ_UINT32 end, + OPJ_UINT32 m, + OPJ_FLOAT32 c) +{ + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); + if (start > 0) { + fw += 2 * start; + fl = fw - 2; + } + for (i = start; i < imax; ++i) { + fw[-1] += (fl[0] + fw[0]) * c; + fl = fw; + fw += 2; + } + if (m < end) { + assert(m + 1 == end); + fw[-1] += (2 * fl[0]) * c; + } +} -/* */ -/* Forward 9-7 wavelet transform in 1-D. */ -/* */ static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { - OPJ_INT32 i; - OPJ_FLOAT32* a = (OPJ_FLOAT32*)aIn; - - if (!cas) { - if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_D(i) += opj_dwt_alpha * (OPJ_S_(i) + OPJ_S_(i + 1)); - } - for (i = 0; i < sn; i++) { - OPJ_S(i) += opj_dwt_beta * (OPJ_D_(i - 1) + OPJ_D_(i)); - } - for (i = 0; i < dn; i++) { - OPJ_D(i) += opj_dwt_gamma * (OPJ_S_(i) + OPJ_S_(i + 1)); - } - for (i = 0; i < sn; i++) { - OPJ_S(i) += opj_dwt_delta * (OPJ_D_(i - 1) + OPJ_D_(i)); - } - for (i = 0; i < dn; i++) { - OPJ_D(i) = opj_K / 2 * OPJ_D(i); - } - for (i = 0; i < sn; i++) { - OPJ_S(i) = opj_c13318 / 2 * OPJ_S(i); - } + OPJ_FLOAT32* w = (OPJ_FLOAT32*)aIn; + OPJ_INT32 a, b; + if (cas == 0) { + if (!((dn > 0) || (sn > 1))) { + return; } + a = 0; + b = 1; } else { - if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < dn; i++) { - OPJ_S(i) += opj_dwt_alpha * (OPJ_DD_(i) + OPJ_DD_(i - 1)); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) += opj_dwt_beta * (OPJ_SS_(i) + OPJ_SS_(i + 1)); - } - for (i = 0; i < dn; i++) { - OPJ_S(i) += opj_dwt_gamma * (OPJ_DD_(i) + OPJ_DD_(i - 1)); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) += opj_dwt_delta * (OPJ_SS_(i) + OPJ_SS_(i + 1)); - } - for (i = 0; i < dn; i++) { - OPJ_S(i) = opj_K / 2 * OPJ_S(i); - } - for (i = 0; i < sn; i++) { - OPJ_D(i) = opj_c13318 / 2 * OPJ_D(i); - } + if (!((sn > 0) || (dn > 1))) { + return; } + a = 1; + b = 0; } + opj_dwt_encode_step2(w + a, w + b + 1, + 0, (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_alpha); + opj_dwt_encode_step2(w + b, w + a + 1, + 0, (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_beta); + opj_dwt_encode_step2(w + a, w + b + 1, + 0, (OPJ_UINT32)dn, + (OPJ_UINT32)opj_int_min(dn, sn - b), + opj_dwt_gamma); + opj_dwt_encode_step2(w + b, w + a + 1, + 0, (OPJ_UINT32)sn, + (OPJ_UINT32)opj_int_min(sn, dn - a), + opj_dwt_delta); + opj_dwt_encode_step1(w + b, 0, (OPJ_UINT32)dn, + opj_K / 2); + opj_dwt_encode_step1(w + a, 0, (OPJ_UINT32)sn, + opj_c13318 / 2); } static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,