diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index a4ff01ba..4fce8b20 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -129,16 +129,16 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT static OPJ_BOOL opj_dwt_encode_procedure( opj_tcd_tilecomp_t * tilec, void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) ); -static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i); +static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, OPJ_UINT32 i); /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); +static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt); -static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size); +static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT w, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_INT32 x, OPJ_INT32 size); -static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); +static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT v , OPJ_FLOAT32* OPJ_RESTRICT a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); #ifdef __SSE__ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c); @@ -543,7 +543,7 @@ void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) { /* */ /* Determine maximum computed resolution level for inverse wavelet transform */ /* */ -static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i) { +static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, OPJ_UINT32 i) { OPJ_UINT32 mr = 0; OPJ_UINT32 w; while( --i ) { @@ -582,7 +582,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres v.mem = h.mem; while( --numres) { - OPJ_INT32 * restrict tiledp = tilec->data; + OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data; OPJ_UINT32 j; ++tr; @@ -617,8 +617,8 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres return OPJ_TRUE; } -static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){ - OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); +static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT w, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_INT32 x, OPJ_INT32 size){ + OPJ_FLOAT32* OPJ_RESTRICT bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); OPJ_INT32 count = w->sn; OPJ_INT32 i, k; @@ -660,8 +660,8 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restric } } -static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){ - opj_v4_t* restrict bi = v->wavelet + v->cas; +static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT v , OPJ_FLOAT32* OPJ_RESTRICT a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){ + opj_v4_t* OPJ_RESTRICT bi = v->wavelet + v->cas; OPJ_INT32 i; for(i = 0; i < v->sn; ++i){ @@ -679,7 +679,7 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restri #ifdef __SSE__ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ - __m128* restrict vw = (__m128*) w; + __m128* OPJ_RESTRICT vw = (__m128*) w; OPJ_INT32 i; /* 4x unrolled loop */ for(i = 0; i < count >> 2; ++i){ @@ -700,8 +700,8 @@ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m12 } void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){ - __m128* restrict vl = (__m128*) l; - __m128* restrict vw = (__m128*) w; + __m128* OPJ_RESTRICT vl = (__m128*) l; + __m128* OPJ_RESTRICT vw = (__m128*) w; OPJ_INT32 i; __m128 tmp1, tmp2, tmp3; tmp1 = vl[0]; @@ -729,7 +729,7 @@ void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c) { - OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; + OPJ_FLOAT32* OPJ_RESTRICT fw = (OPJ_FLOAT32*) w; OPJ_INT32 i; for(i = 0; i < count; ++i){ OPJ_FLOAT32 tmp1 = fw[i*8 ]; @@ -797,7 +797,7 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_IN /* */ /* Inverse 9-7 wavelet transform in 1-D. */ /* */ -static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt) +static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) { OPJ_INT32 a, b; if(dwt->cas == 0) { @@ -834,7 +834,7 @@ static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt) /* */ /* Inverse 9-7 wavelet transform in 2-D. */ /* */ -OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres) +OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres) { opj_v4dwt_t h; opj_v4dwt_t v; @@ -854,7 +854,7 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numr v.wavelet = h.wavelet; while( --numres) { - OPJ_FLOAT32 * restrict aj = (OPJ_FLOAT32*) tilec->data; + OPJ_FLOAT32 * OPJ_RESTRICT aj = (OPJ_FLOAT32*) tilec->data; OPJ_UINT32 bufsize = (OPJ_UINT32)((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0)); OPJ_INT32 j; diff --git a/src/lib/openjp2/dwt.h b/src/lib/openjp2/dwt.h index 21fe942a..5ff37511 100644 --- a/src/lib/openjp2/dwt.h +++ b/src/lib/openjp2/dwt.h @@ -93,7 +93,7 @@ Apply an irreversible inverse DWT transform to a component of an image. @param tilec Tile component information (current tile) @param numres Number of resolution levels to decode */ -OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres); +OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 numres); /** Get the gain of a subband for the irreversible 9-7 DWT. diff --git a/src/lib/openjp2/mct.c b/src/lib/openjp2/mct.c index 02259679..e1f2f50d 100644 --- a/src/lib/openjp2/mct.c +++ b/src/lib/openjp2/mct.c @@ -74,9 +74,9 @@ const OPJ_FLOAT64 * opj_mct_get_mct_norms_real () /* */ #ifdef __SSE2__ void opj_mct_encode( - OPJ_INT32* restrict c0, - OPJ_INT32* restrict c1, - OPJ_INT32* restrict c2, + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, + OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_SIZE_T i; @@ -116,9 +116,9 @@ void opj_mct_encode( } #else void opj_mct_encode( - OPJ_INT32* restrict c0, - OPJ_INT32* restrict c1, - OPJ_INT32* restrict c2, + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, + OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_SIZE_T i; @@ -143,9 +143,9 @@ void opj_mct_encode( /* */ #ifdef __SSE2__ void opj_mct_decode( - OPJ_INT32* restrict c0, - OPJ_INT32* restrict c1, - OPJ_INT32* restrict c2, + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, + OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_SIZE_T i; @@ -178,9 +178,9 @@ void opj_mct_decode( } #else void opj_mct_decode( - OPJ_INT32* restrict c0, - OPJ_INT32* restrict c1, - OPJ_INT32* restrict c2, + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, + OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_UINT32 i; @@ -210,9 +210,9 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) { /* */ #ifdef __SSE4_1__ void opj_mct_encode_real( - OPJ_INT32* restrict c0, - OPJ_INT32* restrict c1, - OPJ_INT32* restrict c2, + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, + OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_SIZE_T i; @@ -351,9 +351,9 @@ void opj_mct_encode_real( } #else void opj_mct_encode_real( - OPJ_INT32* restrict c0, - OPJ_INT32* restrict c1, - OPJ_INT32* restrict c2, + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, + OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_UINT32 i; @@ -375,9 +375,9 @@ void opj_mct_encode_real( /* Inverse irreversible MCT. */ /* */ void opj_mct_decode_real( - OPJ_FLOAT32* restrict c0, - OPJ_FLOAT32* restrict c1, - OPJ_FLOAT32* restrict c2, + OPJ_FLOAT32* OPJ_RESTRICT c0, + OPJ_FLOAT32* OPJ_RESTRICT c1, + OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_UINT32 n) { OPJ_UINT32 i; diff --git a/src/lib/openjp2/mct.h b/src/lib/openjp2/mct.h index 1c1f4d0c..1bc722e3 100644 --- a/src/lib/openjp2/mct.h +++ b/src/lib/openjp2/mct.h @@ -60,7 +60,7 @@ Apply a reversible multi-component transform to an image @param c2 Samples blue component @param n Number of samples for each component */ -void opj_mct_encode(OPJ_INT32 *c0, OPJ_INT32 *c1, OPJ_INT32 *c2, OPJ_UINT32 n); +void opj_mct_encode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); /** Apply a reversible multi-component inverse transform to an image @param c0 Samples for luminance component @@ -68,7 +68,7 @@ Apply a reversible multi-component inverse transform to an image @param c2 Samples for blue chrominance component @param n Number of samples for each component */ -void opj_mct_decode(OPJ_INT32 *c0, OPJ_INT32 *c1, OPJ_INT32 *c2, OPJ_UINT32 n); +void opj_mct_decode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); /** Get norm of the basis function used for the reversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) @@ -83,7 +83,7 @@ Apply an irreversible multi-component transform to an image @param c2 Samples blue component @param n Number of samples for each component */ -void opj_mct_encode_real(OPJ_INT32 *c0, OPJ_INT32 *c1, OPJ_INT32 *c2, OPJ_UINT32 n); +void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); /** Apply an irreversible multi-component inverse transform to an image @param c0 Samples for luminance component @@ -91,7 +91,7 @@ Apply an irreversible multi-component inverse transform to an image @param c2 Samples for blue chrominance component @param n Number of samples for each component */ -void opj_mct_decode_real(OPJ_FLOAT32* c0, OPJ_FLOAT32* c1, OPJ_FLOAT32* c2, OPJ_UINT32 n); +void opj_mct_decode_real(OPJ_FLOAT32* OPJ_RESTRICT c0, OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_UINT32 n); /** Get norm of the basis function used for the irreversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index 58a5a9a9..60b7316d 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -103,12 +103,21 @@ */ /* Are restricted pointers available? (C99) */ -#if (__STDC_VERSION__ != 199901L) +#if (__STDC_VERSION__ >= 199901L) + #define OPJ_RESTRICT restrict +#else /* Not a C99 compiler */ - #ifdef __GNUC__ - #define restrict __restrict__ + #if defined(__GNUC__) + #define OPJ_RESTRICT __restrict__ + +/* + vc14 (2015) outputs wrong results. + Need to check OPJ_RESTRICT usage (or a bug in vc14) + #elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define OPJ_RESTRICT __restrict +*/ #else - #define restrict /* restrict */ + #define OPJ_RESTRICT /* restrict */ #endif #endif diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 1bf7205e..cb5a1cef 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1283,14 +1283,14 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, opj_tcd_resolution_t* res = &tilec->resolutions[resno]; for (bandno = 0; bandno < res->numbands; ++bandno) { - opj_tcd_band_t* restrict band = &res->bands[bandno]; + opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; for (precno = 0; precno < res->pw * res->ph; ++precno) { opj_tcd_precinct_t* precinct = &band->precincts[precno]; for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; - OPJ_INT32* restrict datap; + OPJ_INT32* OPJ_RESTRICT datap; OPJ_UINT32 cblk_w, cblk_h; OPJ_INT32 x, y; OPJ_UINT32 i, j; @@ -1333,7 +1333,7 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, } } if (tccp->qmfbid == 1) { - OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; for (j = 0; j < cblk_h; ++j) { for (i = 0; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; @@ -1341,9 +1341,9 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, } } } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; for (j = 0; j < cblk_h; ++j) { - OPJ_FLOAT32* restrict tiledp2 = tiledp; + OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize; *tiledp2 = tmp; @@ -1475,7 +1475,7 @@ OPJ_BOOL opj_t1_encode_cblks( opj_t1_t *t1, opj_tcd_resolution_t *res = &tilec->resolutions[resno]; for (bandno = 0; bandno < res->numbands; ++bandno) { - opj_tcd_band_t* restrict band = &res->bands[bandno]; + opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; OPJ_INT32 bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192)); for (precno = 0; precno < res->pw * res->ph; ++precno) { @@ -1483,7 +1483,7 @@ OPJ_BOOL opj_t1_encode_cblks( opj_t1_t *t1, for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno]; - OPJ_INT32* restrict tiledp; + OPJ_INT32* OPJ_RESTRICT tiledp; OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_h; OPJ_UINT32 i, j, tileIndex=0, tileLineAdvance;