diff --git a/libopenjpeg/dwt.c b/libopenjpeg/dwt.c index cb827dbb..fc54c568 100644 --- a/libopenjpeg/dwt.c +++ b/libopenjpeg/dwt.c @@ -106,6 +106,7 @@ static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 Inverse 5-3 wavelet transform in 1-D */ static void opj_dwt_decode_1(dwt_t *v); +static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); /** Forward 9-7 wavelet transform in 1-D */ @@ -122,7 +123,25 @@ static opj_bool opj_dwt_decode_tile(opj_tcd_tilecomp_v2_t* tilec, OPJ_UINT32 i, static opj_bool opj_dwt_encode_procedure( opj_tcd_tilecomp_v2_t * tilec, void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) ); -static OPJ_UINT32 dwt_max_resolution_v2(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i); +static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i); + +/* */ +/* Inverse 9-7 wavelet transform in 1-D. */ +/* */ +static void opj_v4dwt_decode(v4dwt_t* restrict dwt); + +static void opj_v4dwt_interleave_h(v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size); + +static void opj_v4dwt_interleave_v(v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); + +#ifdef __SSE__ +static void opj_v4dwt_decode_step1_sse(v4* w, OPJ_INT32 count, const __m128 c); + +static void opj_v4dwt_decode_step2_sse(v4* l, v4* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){ +#endif + +static void opj_v4dwt_decode_step1(v4* w, OPJ_INT32 count, const OPJ_FLOAT32 c); +static void opj_v4dwt_decode_step2(v4* l, v4* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c); /*@}*/ @@ -140,7 +159,7 @@ static OPJ_UINT32 dwt_max_resolution_v2(opj_tcd_resolution_v2_t* restrict r, OPJ /* */ /* This table contains the norms of the 5-3 wavelets for different bands. */ /* */ -static const double opj_dwt_norms[4][10] = { +static const OPJ_FLOAT64 opj_dwt_norms[4][10] = { {1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3}, {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, @@ -150,7 +169,7 @@ static const double opj_dwt_norms[4][10] = { /* */ /* This table contains the norms of the 9-7 wavelets for different bands. */ /* */ -static const double dwt_norms_real[4][10] = { +static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { {1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9}, {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, @@ -277,7 +296,7 @@ void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { /* */ /* Inverse 5-3 wavelet transform in 1-D. */ /* */ -static void dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { +static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { OPJ_INT32 i; if (!cas) { @@ -299,7 +318,7 @@ static void dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 ca /* Inverse 5-3 wavelet transform in 1-D. */ /* */ static void opj_dwt_decode_1(dwt_t *v) { - dwt_decode_1_(v->mem, v->dn, v->sn, v->cas); + opj_dwt_decode_1_(v->mem, v->dn, v->sn, v->cas); } /* */ @@ -383,7 +402,7 @@ INLINE opj_bool opj_dwt_encode_procedure(opj_tcd_tilecomp_v2_t * tilec,void (*p_ rw = l_cur_res->x1 - l_cur_res->x0; rh = l_cur_res->y1 - l_cur_res->y0; - l_data_size = dwt_max_resolution_v2( tilec->resolutions,tilec->numresolutions) * sizeof(OPJ_INT32); + l_data_size = opj_dwt_max_resolution( tilec->resolutions,tilec->numresolutions) * sizeof(OPJ_INT32); bj = (OPJ_INT32*)opj_malloc(l_data_size); if (! bj) { return OPJ_FALSE; @@ -467,14 +486,14 @@ OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) { /* Get norm of 5-3 wavelet. */ /* */ double dwt_getnorm(int level, int orient) { - return dwt_norms[orient][level]; + return opj_dwt_norms[orient][level]; } /* */ /* Get norm of 5-3 wavelet. */ /* */ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) { - return dwt_norms[orient][level]; + return opj_dwt_norms[orient][level]; } /* */ @@ -497,14 +516,14 @@ OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) { /* Get norm of 9-7 wavelet. */ /* */ double dwt_getnorm_real(int level, int orient) { - return dwt_norms_real[orient][level]; + return opj_dwt_norms_real[orient][level]; } /* */ /* Get norm of 9-7 wavelet. */ /* */ OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) { - return dwt_norms_real[orient][level]; + return opj_dwt_norms_real[orient][level]; } void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) { @@ -521,7 +540,7 @@ void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) { if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { stepsize = 1.0; } else { - OPJ_FLOAT64 norm = dwt_norms_real[orient][level]; + OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; stepsize = (1 << (gain)) / norm; } opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), prec + gain, &tccp->stepsizes[bandno]); @@ -564,7 +583,7 @@ static OPJ_UINT32 dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT3 /* */ /* Determine maximum computed resolution level for inverse wavelet transform */ /* */ -static OPJ_UINT32 dwt_max_resolution_v2(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i) { +static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_v2_t* restrict r, OPJ_UINT32 i) { OPJ_UINT32 mr = 0; OPJ_UINT32 w; while( --i ) { @@ -592,7 +611,7 @@ static opj_bool opj_dwt_decode_tile(opj_tcd_tilecomp_v2_t* tilec, OPJ_UINT32 num OPJ_UINT32 w = tilec->x1 - tilec->x0; h.mem = (OPJ_INT32*) - opj_aligned_malloc(dwt_max_resolution_v2(tr, numres) * sizeof(OPJ_INT32)); + opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32)); if (! h.mem) { @@ -637,16 +656,16 @@ static opj_bool opj_dwt_decode_tile(opj_tcd_tilecomp_v2_t* tilec, OPJ_UINT32 num return OPJ_TRUE; } -static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, int size){ - float* restrict bi = (float*) (w->wavelet + w->cas); - int count = w->sn; - int i, k; +void opj_v4dwt_interleave_h(v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){ + OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); + OPJ_INT32 count = w->sn; + OPJ_INT32 i, k; for(k = 0; k < 2; ++k){ if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) { /* Fast code path */ for(i = 0; i < count; ++i){ - int j = i; + OPJ_INT32 j = i; bi[i*8 ] = a[j]; j += x; bi[i*8 + 1] = a[j]; @@ -659,7 +678,7 @@ static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, in else { /* Slow code path */ for(i = 0; i < count; ++i){ - int j = i; + OPJ_INT32 j = i; bi[i*8 ] = a[j]; j += x; if(j >= size) continue; @@ -680,27 +699,27 @@ static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, in } } -static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x, int nb_elts_read){ +void opj_v4dwt_interleave_v(v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){ v4* restrict bi = v->wavelet + v->cas; - int i; + OPJ_INT32 i; for(i = 0; i < v->sn; ++i){ - memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(float)); + memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(OPJ_FLOAT32)); } a += v->sn * x; bi = v->wavelet + 1 - v->cas; for(i = 0; i < v->dn; ++i){ - memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(float)); + memcpy(&bi[i*2], &a[i*x], nb_elts_read * sizeof(OPJ_FLOAT32)); } } #ifdef __SSE__ -static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){ +void opj_v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){ __m128* restrict vw = (__m128*) w; - int i; + OPJ_INT32 i; /* 4x unrolled loop */ for(i = 0; i < count >> 2; ++i){ *vw = _mm_mul_ps(*vw, c); @@ -719,7 +738,7 @@ static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){ } } -static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){ +void opj_v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){ __m128* restrict vl = (__m128*) l; __m128* restrict vw = (__m128*) w; int i; @@ -747,14 +766,14 @@ static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){ #else -static void v4dwt_decode_step1(v4* w, int count, const float c){ - float* restrict fw = (float*) w; - int i; +void opj_v4dwt_decode_step1(v4* w, OPJ_INT32 count, const OPJ_FLOAT32 c){ + OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; + OPJ_INT32 i; for(i = 0; i < count; ++i){ - float tmp1 = fw[i*8 ]; - float tmp2 = fw[i*8 + 1]; - float tmp3 = fw[i*8 + 2]; - float tmp4 = fw[i*8 + 3]; + OPJ_FLOAT32 tmp1 = fw[i*8 ]; + OPJ_FLOAT32 tmp2 = fw[i*8 + 1]; + OPJ_FLOAT32 tmp3 = fw[i*8 + 2]; + OPJ_FLOAT32 tmp4 = fw[i*8 + 3]; fw[i*8 ] = tmp1 * c; fw[i*8 + 1] = tmp2 * c; fw[i*8 + 2] = tmp3 * c; @@ -762,23 +781,23 @@ static void v4dwt_decode_step1(v4* w, int count, const float c){ } } -static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){ - float* restrict fl = (float*) l; - float* restrict fw = (float*) w; +void opj_v4dwt_decode_step2(v4* l, v4* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c){ + OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l; + OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; int i; for(i = 0; i < m; ++i){ - float tmp1_1 = fl[0]; - float tmp1_2 = fl[1]; - float tmp1_3 = fl[2]; - float tmp1_4 = fl[3]; - float tmp2_1 = fw[-4]; - float tmp2_2 = fw[-3]; - float tmp2_3 = fw[-2]; - float tmp2_4 = fw[-1]; - float tmp3_1 = fw[0]; - float tmp3_2 = fw[1]; - float tmp3_3 = fw[2]; - float tmp3_4 = fw[3]; + OPJ_FLOAT32 tmp1_1 = fl[0]; + OPJ_FLOAT32 tmp1_2 = fl[1]; + OPJ_FLOAT32 tmp1_3 = fl[2]; + OPJ_FLOAT32 tmp1_4 = fl[3]; + OPJ_FLOAT32 tmp2_1 = fw[-4]; + OPJ_FLOAT32 tmp2_2 = fw[-3]; + OPJ_FLOAT32 tmp2_3 = fw[-2]; + OPJ_FLOAT32 tmp2_4 = fw[-1]; + OPJ_FLOAT32 tmp3_1 = fw[0]; + OPJ_FLOAT32 tmp3_2 = fw[1]; + OPJ_FLOAT32 tmp3_3 = fw[2]; + OPJ_FLOAT32 tmp3_4 = fw[3]; fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); @@ -787,20 +806,20 @@ static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){ fw += 8; } if(m < k){ - float c1; - float c2; - float c3; - float c4; + OPJ_FLOAT32 c1; + OPJ_FLOAT32 c2; + OPJ_FLOAT32 c3; + OPJ_FLOAT32 c4; c += c; c1 = fl[0] * c; c2 = fl[1] * c; c3 = fl[2] * c; c4 = fl[3] * c; for(; m < k; ++m){ - float tmp1 = fw[-4]; - float tmp2 = fw[-3]; - float tmp3 = fw[-2]; - float tmp4 = fw[-1]; + OPJ_FLOAT32 tmp1 = fw[-4]; + OPJ_FLOAT32 tmp2 = fw[-3]; + OPJ_FLOAT32 tmp3 = fw[-2]; + OPJ_FLOAT32 tmp4 = fw[-1]; fw[-4] = tmp1 + c1; fw[-3] = tmp2 + c2; fw[-2] = tmp3 + c3; @@ -815,7 +834,7 @@ static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){ /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void v4dwt_decode(v4dwt_t* restrict dwt){ +void opj_v4dwt_decode(v4dwt_t* restrict dwt){ int a, b; if(dwt->cas == 0) { if(!((dwt->dn > 0) || (dwt->sn > 1))){ @@ -831,19 +850,19 @@ static void v4dwt_decode(v4dwt_t* restrict dwt){ b = 0; } #ifdef __SSE__ - v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); - v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318)); - v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); - v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); - v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); - v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); + opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); + opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318)); + opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); + opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); + opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); + opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); #else - v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); - v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); - v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_delta); - v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma); - v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_beta); - v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha); + opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); + opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); + opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_delta); + opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma); + opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), opj_dwt_beta); + opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha); #endif } @@ -885,8 +904,8 @@ opj_bool dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){ for(j = rh; j > 3; j -= 4){ int k; - v4dwt_interleave_h(&h, aj, w, bufsize); - v4dwt_decode(&h); + opj_v4dwt_interleave_h(&h, aj, w, bufsize); + opj_v4dwt_decode(&h); for(k = rw; --k >= 0;){ aj[k ] = h.wavelet[k].f[0]; aj[k+w ] = h.wavelet[k].f[1]; @@ -899,8 +918,8 @@ opj_bool dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){ if (rh & 0x03) { int k; j = rh & 0x03; - v4dwt_interleave_h(&h, aj, w, bufsize); - v4dwt_decode(&h); + opj_v4dwt_interleave_h(&h, aj, w, bufsize); + opj_v4dwt_decode(&h); for(k = rw; --k >= 0;){ switch(j) { case 3: aj[k+w*2] = h.wavelet[k].f[2]; @@ -916,8 +935,8 @@ opj_bool dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){ aj = (float*) tilec->data; for(j = rw; j > 3; j -= 4){ int k; - v4dwt_interleave_v(&v, aj, w, 4); - v4dwt_decode(&v); + opj_v4dwt_interleave_v(&v, aj, w, 4); + opj_v4dwt_decode(&v); for(k = 0; k < rh; ++k){ memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(float)); } @@ -926,8 +945,8 @@ opj_bool dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){ if (rw & 0x03){ int k; j = rw & 0x03; - v4dwt_interleave_v(&v, aj, w, j); - v4dwt_decode(&v); + opj_v4dwt_interleave_v(&v, aj, w, j); + opj_v4dwt_decode(&v); for(k = 0; k < rh; ++k){ memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(float)); } @@ -953,7 +972,7 @@ opj_bool dwt_decode_real_v2(opj_tcd_tilecomp_v2_t* restrict tilec, OPJ_UINT32 nu OPJ_UINT32 w = tilec->x1 - tilec->x0; - h.wavelet = (v4*) opj_aligned_malloc((dwt_max_resolution_v2(res, numres)+5) * sizeof(v4)); + h.wavelet = (v4*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(v4)); v.wavelet = h.wavelet; while( --numres) { @@ -974,8 +993,8 @@ opj_bool dwt_decode_real_v2(opj_tcd_tilecomp_v2_t* restrict tilec, OPJ_UINT32 nu for(j = rh; j > 3; j -= 4) { OPJ_INT32 k; - v4dwt_interleave_h(&h, aj, w, bufsize); - v4dwt_decode(&h); + opj_v4dwt_interleave_h(&h, aj, w, bufsize); + opj_v4dwt_decode(&h); for(k = rw; --k >= 0;){ aj[k ] = h.wavelet[k].f[0]; @@ -991,8 +1010,8 @@ opj_bool dwt_decode_real_v2(opj_tcd_tilecomp_v2_t* restrict tilec, OPJ_UINT32 nu if (rh & 0x03) { OPJ_INT32 k; j = rh & 0x03; - v4dwt_interleave_h(&h, aj, w, bufsize); - v4dwt_decode(&h); + opj_v4dwt_interleave_h(&h, aj, w, bufsize); + opj_v4dwt_decode(&h); for(k = rw; --k >= 0;){ switch(j) { case 3: aj[k+w*2] = h.wavelet[k].f[2]; @@ -1009,8 +1028,8 @@ opj_bool dwt_decode_real_v2(opj_tcd_tilecomp_v2_t* restrict tilec, OPJ_UINT32 nu for(j = rw; j > 3; j -= 4){ OPJ_UINT32 k; - v4dwt_interleave_v(&v, aj, w, 4); - v4dwt_decode(&v); + opj_v4dwt_interleave_v(&v, aj, w, 4); + opj_v4dwt_decode(&v); for(k = 0; k < rh; ++k){ memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); @@ -1023,8 +1042,8 @@ opj_bool dwt_decode_real_v2(opj_tcd_tilecomp_v2_t* restrict tilec, OPJ_UINT32 nu j = rw & 0x03; - v4dwt_interleave_v(&v, aj, w, j); - v4dwt_decode(&v); + opj_v4dwt_interleave_v(&v, aj, w, j); + opj_v4dwt_decode(&v); for(k = 0; k < rh; ++k){ memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(OPJ_FLOAT32));