From cd12414c6b11295ae8540df2ef77eb9c624cd264 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 20 May 2017 14:05:07 +0200 Subject: [PATCH 01/19] T1: use more compact flags to optimize cache usage in encoder passes. (#172) Ported from Carl Hetherington work (actually through Matthieu Darbois's port on top of OpenJPEG 2.1.0) Can reduce total encoding time by 10-15% WARNING: VSC mode is not implemented, and so is a temporary regression that must be fixed. --- src/lib/openjp2/t1.c | 636 ++++++++++++++++++++--------- src/lib/openjp2/t1.h | 101 +++++ src/lib/openjp2/t1_generate_luts.c | 236 ++++++++++- src/lib/openjp2/t1_luts.h | 97 +++++ 4 files changed, 863 insertions(+), 207 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index af520202..982dbaa6 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -12,6 +12,7 @@ * Copyright (c) 2003-2014, Antonin Descampe * Copyright (c) 2005, Herve Drolon, FreeImage Team * Copyright (c) 2007, Callum Lerwick + * Copyright (c) 2012, Carl Hetherington * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,41 +45,43 @@ /** @defgroup T1 T1 - Implementation of the tier-1 coding */ /*@{*/ +#define ENC_FLAGS(x, y) (t1->enc_flags[x + 1 + ((y / 4) + 1) * t1->flags_stride]) + /** @name Local static functions */ /*@{*/ +static INLINE OPJ_BYTE opj_t1_enc_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient); static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f); +static OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 fX, OPJ_UINT32 pfX, + OPJ_UINT32 nfX, OPJ_UINT32 ci); static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f); +static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f); static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); +static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, + OPJ_UINT32 ci); static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); +static void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, + OPJ_UINT32 s, OPJ_UINT32 stride); static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride); /** Encode significant pass */ static void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, + opj_flag_enc_t *flagsp, OPJ_INT32 *datap, + OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, OPJ_BYTE type, - OPJ_UINT32 vsc); + OPJ_UINT32 k); /** Decode significant pass */ -#if 0 -static void opj_t1_dec_sigpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_UINT32 orient, - OPJ_INT32 oneplushalf, - OPJ_BYTE type, - OPJ_UINT32 vsc); -#endif static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, @@ -111,6 +114,7 @@ Encode significant pass */ static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 bpno, + OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 cblksty); @@ -132,13 +136,12 @@ static void opj_t1_dec_sigpass_mqc_vsc( Encode refinement pass */ static void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, + opj_flag_enc_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 vsc); + OPJ_BYTE type); /** @@ -164,15 +167,6 @@ static void opj_t1_dec_refpass_mqc_vsc( /** Decode refinement pass */ -#if 0 -static void opj_t1_dec_refpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 poshalf, - OPJ_INT32 neghalf, - OPJ_BYTE type, - OPJ_UINT32 vsc); -#endif static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, @@ -206,13 +200,16 @@ Encode clean-up pass */ static void opj_t1_enc_clnpass_step( opj_t1_t *t1, - opj_flag_t *flagsp, + opj_flag_enc_t *flagsp, OPJ_INT32 *datap, + OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, - OPJ_UINT32 partial, - OPJ_UINT32 vsc); + OPJ_UINT32 agg, + OPJ_UINT32 runlen, + OPJ_UINT32 x, + OPJ_UINT32 y); /** Decode clean-up pass */ @@ -245,6 +242,7 @@ Encode clean-up pass static void opj_t1_enc_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, + OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty); @@ -297,16 +295,58 @@ static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1, /* ----------------------------------------------------------------------- */ +static INLINE OPJ_BYTE opj_t1_enc_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient) +{ + return lut_enc_ctxno_zc[(orient << 9) | (f & T1_SIGMA_NEIGHBOURS)]; +} + static OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f) { return mqc->lut_ctxno_zc_orient[(f & T1_SIG_OTH)]; } + +static OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 fX, OPJ_UINT32 pfX, + OPJ_UINT32 nfX, OPJ_UINT32 ci) +{ + /* + 0 pfX T1_CHI_THIS T1_LUT_CTXNO_SGN_W + 1 tfX T1_SIGMA_1 T1_LUT_CTXNO_SIG_N + 2 nfX T1_CHI_THIS T1_LUT_CTXNO_SGN_E + 3 tfX T1_SIGMA_3 T1_LUT_CTXNO_SIG_W + 4 fX T1_CHI_(THIS - 1) T1_LUT_CTXNO_SGN_N + 5 tfX T1_SIGMA_5 T1_LUT_CTXNO_SIG_E + 6 fX T1_CHI_(THIS + 1) T1_LUT_CTXNO_SGN_S + 7 tfX T1_SIGMA_7 T1_LUT_CTXNO_SIG_S + */ + + OPJ_UINT32 lu = (fX >> (ci * 3)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 | + T1_SIGMA_7); + + lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0); + lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2); + if (ci == 0U) { + lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4); + } else { + lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4); + } + lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6); + + return lut_enc_ctxno_sc[lu]; +} + static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f) { return lut_ctxno_sc[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; } +static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f) +{ + OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; + OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp; + return tmp2; +} + static OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f) { OPJ_UINT32 tmp1 = (f & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; @@ -314,6 +354,35 @@ static OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f) return (tmp2); } +static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, + OPJ_UINT32 ci) +{ + /* + 0 pfX T1_CHI_THIS T1_LUT_SGN_W + 1 tfX T1_SIGMA_1 T1_LUT_SIG_N + 2 nfX T1_CHI_THIS T1_LUT_SGN_E + 3 tfX T1_SIGMA_3 T1_LUT_SIG_W + 4 fX T1_CHI_(THIS - 1) T1_LUT_SGN_N + 5 tfX T1_SIGMA_5 T1_LUT_SIG_E + 6 fX T1_CHI_(THIS + 1) T1_LUT_SGN_S + 7 tfX T1_SIGMA_7 T1_LUT_SIG_S + */ + + int lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 | + T1_SIGMA_7); + + lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0); + lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2); + if (ci == 0U) { + lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4); + } else { + lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4); + } + lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6); + + return lut_enc_spb[lu]; +} + static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f) { return lut_spb[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; @@ -337,6 +406,62 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; } + +static void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, + OPJ_UINT32 s, OPJ_UINT32 stride) +{ + /* set up to point to the north and south data points' flags words, if required */ + opj_flag_enc_t* north; + opj_flag_enc_t* south; + + /* mark target as significant */ + *flagsp |= T1_SIGMA_4 << (3U * ci); + + /* north-west, north, north-east */ + if (ci == 0U) { + north = flagsp - stride; + *north |= T1_SIGMA_16; + north[-1] |= T1_SIGMA_17; + north[1] |= T1_SIGMA_15; + } + + /* south-west, south, south-east */ + if (ci == 3U) { + south = flagsp + stride; + *south |= T1_SIGMA_1; + south[-1] |= T1_SIGMA_2; + south[1] |= T1_SIGMA_0; + } + + /* east */ + flagsp[-1] |= T1_SIGMA_5 << (3U * ci); + + /* west */ + flagsp[1] |= T1_SIGMA_3 << (3U * ci); + + if (s) { + switch (ci) { + case 0U: { + *flagsp |= T1_CHI_1; + *north |= T1_CHI_5; + break; + } + case 1: + *flagsp |= T1_CHI_2; + break; + case 2: + *flagsp |= T1_CHI_3; + break; + case 3: { + *flagsp |= T1_CHI_4; + *south |= T1_CHI_0; + break; + } + + } + } +} + static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride) { @@ -405,48 +530,64 @@ static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, } } -static void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 vsc - ) + +void opj_t1_enc_sigpass_step(opj_t1_t *t1, + opj_flag_enc_t *flagsp, + OPJ_INT32 *datap, + OPJ_UINT32 orient, + OPJ_INT32 bpno, + OPJ_INT32 one, + OPJ_INT32 *nmsedec, + OPJ_BYTE type, + OPJ_UINT32 k + ) { OPJ_INT32 v; - OPJ_UINT32 flag; + OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | - T1_SGN_S))) : (OPJ_UINT32)(*flagsp); - if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { - v = (opj_int_abs(*datap) & one) ? 1 : 0; - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); /* ESSAI */ - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v); - } else { - opj_mqc_encode(mqc, (OPJ_UINT32)v); - } - if (v) { - v = *datap < 0 ? 1 : 0; - *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)(bpno)); - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); /* ESSAI */ + if (*flagsp == 0U) { + /* Nothing to do for any of the 4 data points */ + return; + } + + for (ci = 0U; ci < 4U; ++ci) { + /* XXX:TODO enc_flags_t and vsc mode a la + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; + flag = vsc ? ((*dec_flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*dec_flagsp); + */ + + OPJ_UINT32 const shift_flags = *flagsp >> (ci * 3U); + + if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == 0U && + (shift_flags & T1_SIGMA_NEIGHBOURS) != 0U) { + v = opj_int_abs(*datap) & one ? 1 : 0; + opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_zc(shift_flags, + orient)); /* ESSAI */ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v); + opj_mqc_bypass_enc(mqc, v); } else { - opj_mqc_encode(mqc, (OPJ_UINT32)(v ^ opj_t1_getspb((OPJ_UINT32)flag))); + opj_mqc_encode(mqc, v); } - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + if (v) { + v = *datap < 0 ? 1 : 0; + *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); + opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_sc(*flagsp, flagsp[-1], flagsp[1], + ci)); /* ESSAI */ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ + opj_mqc_bypass_enc(mqc, v); + } else { + opj_mqc_encode(mqc, v ^ opj_t1_enc_getspb(*flagsp, flagsp[-1], flagsp[1], ci)); + } + opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); + } + *flagsp |= T1_PI_0 << (ci * 3U); } - *flagsp |= T1_VISIT; + datap += t1->data_stride; } } - static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, @@ -548,35 +689,37 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( } /* VSC and BYPASS by Antonin */ - static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 bpno, + OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 cblksty ) { - OPJ_UINT32 i, j, k, vsc; - OPJ_INT32 one; + OPJ_UINT32 i, k; + OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); + opj_flag_enc_t* f = &ENC_FLAGS(0, 0); + OPJ_UINT32 const extra = t1->flags_stride - t1->w; + OPJ_INT32* d = t1->data; *nmsedec = 0; - one = 1 << (bpno + T1_NMSEDEC_FRACBITS); for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { - for (j = k; j < k + 4 && j < t1->h; ++j) { - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || - j == t1->h - 1)) ? 1 : 0; - opj_t1_enc_sigpass_step( - t1, - &t1->flags[((j + 1) * t1->flags_stride) + i + 1], - &t1->data[(j * t1->data_stride) + i], - bpno, - one, - nmsedec, - type, - vsc); - } + opj_t1_enc_sigpass_step( + t1, + f, + &t1->data[(k * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + type, k); + + ++f; + ++d; } + f += extra; } } @@ -725,38 +868,53 @@ static void opj_t1_dec_sigpass_mqc_vsc( } } /* VSC and BYPASS by Antonin */ - - static void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, + opj_flag_enc_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 vsc) + OPJ_BYTE type) { OPJ_INT32 v; - OPJ_UINT32 flag; + OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | - T1_SGN_S))) : (OPJ_UINT32)(*flagsp); - if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { - *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)(bpno)); - v = (opj_int_abs(*datap) & one) ? 1 : 0; - opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag(flag)); /* ESSAI */ - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v); - } else { - opj_mqc_encode(mqc, (OPJ_UINT32)v); + if ((*flagsp & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + /* none significant */ + return; + } + if ((*flagsp & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == + (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { + /* all processed by sigpass */ + return; + } + + for (ci = 0U; ci < 4U; ++ci) { + + /* XXX:TODO vsc mode as per + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; + flag = vsc ? ((*dec_flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*dec_flagsp); + */ + + OPJ_UINT32 shift_flags = *flagsp >> (ci * 3U); + if ((shift_flags & (T1_SIGMA_4 | T1_PI_0)) == T1_SIGMA_4) { + *nmsedec += opj_t1_getnmsedec_ref(opj_int_abs(*datap), bpno); + v = opj_int_abs(*datap) & one ? 1 : 0; + opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_mag(shift_flags)); /* ESSAI */ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ + opj_mqc_bypass_enc(mqc, v); + } else { + opj_mqc_encode(mqc, v); + } + *flagsp |= T1_MU_0 << (ci * 3U); } - *flagsp |= T1_REFINE; + datap += t1->data_stride; } } + static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, opj_colflag_t *colflagsp, @@ -852,7 +1010,6 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( } } /* VSC and BYPASS by Antonin */ - static void opj_t1_enc_refpass( opj_t1_t *t1, OPJ_INT32 bpno, @@ -860,30 +1017,29 @@ static void opj_t1_enc_refpass( OPJ_BYTE type, OPJ_UINT32 cblksty) { - OPJ_UINT32 i, j, k, vsc; - OPJ_INT32 one; + OPJ_UINT32 i, k; + const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); + opj_flag_enc_t* f = &ENC_FLAGS(0, 0); + const OPJ_UINT32 extra = t1->flags_stride - t1->w; *nmsedec = 0; - one = 1 << (bpno + T1_NMSEDEC_FRACBITS); - for (k = 0; k < t1->h; k += 4) { - for (i = 0; i < t1->w; ++i) { - for (j = k; j < k + 4 && j < t1->h; ++j) { - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || - j == t1->h - 1)) ? 1 : 0; - opj_t1_enc_refpass_step( - t1, - &t1->flags[((j + 1) * t1->flags_stride) + i + 1], - &t1->data[(j * t1->data_stride) + i], - bpno, - one, - nmsedec, - type, - vsc); - } + for (k = 0U; k < t1->h; k += 4U) { + for (i = 0U; i < t1->w; ++i) { + opj_t1_enc_refpass_step( + t1, + f, + &t1->data[(k * t1->data_stride) + i], + bpno, + one, + nmsedec, + type); + ++f; } + f += extra; } } + static void opj_t1_dec_refpass_raw( opj_t1_t *t1, OPJ_INT32 bpno) @@ -1025,44 +1181,75 @@ static void opj_t1_dec_refpass_mqc_vsc( } } /* VSC and BYPASS by Antonin */ - static void opj_t1_enc_clnpass_step( opj_t1_t *t1, - opj_flag_t *flagsp, + opj_flag_enc_t *flagsp, OPJ_INT32 *datap, + OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, - OPJ_UINT32 partial, - OPJ_UINT32 vsc) + OPJ_UINT32 agg, + OPJ_UINT32 runlen, + OPJ_UINT32 x, + OPJ_UINT32 y) { OPJ_INT32 v; - OPJ_UINT32 flag; - + OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | - T1_SGN_S))) : (OPJ_UINT32)(*flagsp); - if (partial) { - goto LABEL_PARTIAL; - } - if (!(*flagsp & (T1_SIG | T1_VISIT))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); - v = (opj_int_abs(*datap) & one) ? 1 : 0; - opj_mqc_encode(mqc, (OPJ_UINT32)v); - if (v) { -LABEL_PARTIAL: - *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), - (OPJ_UINT32)(bpno)); - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); - v = *datap < 0 ? 1 : 0; - opj_mqc_encode(mqc, (OPJ_UINT32)(v ^ opj_t1_getspb((OPJ_UINT32)flag))); - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + OPJ_UINT32 lim; + const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | + T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + + if ((*flagsp & check) == check) { + if (runlen == 0) { + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + } else if (runlen == 1) { + *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); + } else if (runlen == 2) { + *flagsp &= ~(T1_PI_2 | T1_PI_3); + } else if (runlen == 3) { + *flagsp &= ~(T1_PI_3); } + return; + } + + lim = 4U < (t1->h - y) ? 4U : (t1->h - y); + for (ci = runlen; ci < lim; ++ci) { + opj_flag_enc_t shift_flags; + + /* XXX:TODO vsc mode a la + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; + flag = vsc ? ((*dec_flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*dec_flagsp); + */ + + if ((agg != 0) && (ci == runlen)) { + goto LABEL_PARTIAL; + } + + shift_flags = *flagsp >> (ci * 3U); + + if (!(shift_flags & (T1_SIGMA_4 | T1_PI_0))) { + opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_zc(shift_flags, orient)); + v = opj_int_abs(*datap) & one ? 1 : 0; + opj_mqc_encode(mqc, v); + if (v) { +LABEL_PARTIAL: + *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); + opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_sc(*flagsp, flagsp[-1], flagsp[1], + ci)); + v = *datap < 0 ? 1 : 0; + opj_mqc_encode(mqc, v ^ opj_t1_enc_getspb(*flagsp, flagsp[-1], flagsp[1], ci)); + opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); + } + } + *flagsp &= ~(T1_PI_0 << (3U * ci)); + datap += t1->data_stride; } - *flagsp &= (opj_flag_t)~T1_VISIT; } + static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, @@ -1180,38 +1367,35 @@ LABEL_PARTIAL: #endif } +#define MACRO_t1_enc_flags(x,y) t1->enc_flags[((x)*(t1->flags_stride))+(y)] + static void opj_t1_enc_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, + OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty) { - OPJ_UINT32 i, j, k; - OPJ_INT32 one; - OPJ_UINT32 agg, runlen, vsc; + OPJ_UINT32 i, k; + const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); + OPJ_UINT32 agg, runlen; opj_mqc_t *mqc = t1->mqc; /* MQC component */ *nmsedec = 0; - one = 1 << (bpno + T1_NMSEDEC_FRACBITS); + for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { - if (k + 3 < t1->h) { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - agg = !(MACRO_t1_flags(1 + k, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || MACRO_t1_flags(1 + k + 1, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || MACRO_t1_flags(1 + k + 2, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || (MACRO_t1_flags(1 + k + 3, 1 + i) - & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | - T1_SIG_OTH)); - } else { - agg = !((MACRO_t1_flags(1 + k, 1 + i) | - MACRO_t1_flags(1 + k + 1, 1 + i) | - MACRO_t1_flags(1 + k + 2, 1 + i) | - MACRO_t1_flags(1 + k + 3, 1 + i)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); - } + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + /* XXX:TODO this needs implementing */ + agg = !(MACRO_t1_enc_flags(1 + k, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) + || MACRO_t1_enc_flags(1 + k + 1, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) + || MACRO_t1_enc_flags(1 + k + 2, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) + || (MACRO_t1_enc_flags(1 + k + 3, 1 + i) + & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | + T1_SIG_OTH)); } else { - agg = 0; + agg = !ENC_FLAGS(i, k); } if (agg) { for (runlen = 0; runlen < 4; ++runlen) { @@ -1230,23 +1414,23 @@ static void opj_t1_enc_clnpass( } else { runlen = 0; } - for (j = k + runlen; j < k + 4 && j < t1->h; ++j) { - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || - j == t1->h - 1)) ? 1 : 0; - opj_t1_enc_clnpass_step( - t1, - &t1->flags[((j + 1) * t1->flags_stride) + i + 1], - &t1->data[(j * t1->data_stride) + i], - bpno, - one, - nmsedec, - agg && (j == k + runlen), - vsc); - } + opj_t1_enc_clnpass_step( + t1, + &ENC_FLAGS(i, k), + &t1->data[((k + runlen) * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + agg, + runlen, + i, + k); } } } + #define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] #define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \ @@ -1468,6 +1652,8 @@ static OPJ_BOOL opj_t1_allocate_buffers( OPJ_UINT32 w, OPJ_UINT32 h) { + size_t flagssize; + /* encoder uses tile buffer, so no need to allocate */ if (!t1->encoder) { size_t datasize; @@ -1510,31 +1696,34 @@ static OPJ_BOOL opj_t1_allocate_buffers( } } - { - size_t flagssize; - - /* Overflow check */ - if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - t1->flags_stride = w + 2U; /* can't be 0U */ + /* Overflow check */ + if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + t1->flags_stride = w + 2U; /* can't be 0U */ #if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */ - /* Overflow check */ - if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } + /* Overflow check */ + if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } #endif - flagssize = (size_t)h + 3U; + if (!t1->encoder) { + flagssize = (size_t)h + 2U; + } else { + flagssize = (h + 3U) / 4U + 2U; + } - /* Overflow check */ - if (flagssize > (SIZE_MAX / (size_t)t1->flags_stride)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - flagssize *= (size_t)t1->flags_stride; + /* Overflow check */ + if (flagssize > (SIZE_MAX / (size_t)t1->flags_stride)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + flagssize *= (size_t)t1->flags_stride; + + if (!t1->encoder) { if (flagssize > (size_t)t1->flagssize) { /* Overflow check */ @@ -1556,11 +1745,11 @@ static OPJ_BOOL opj_t1_allocate_buffers( return OPJ_FALSE; } #endif - t1->flagssize = (OPJ_UINT32)flagssize; } + t1->flagssize = (OPJ_UINT32)flagssize; + memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); - } - if (!t1->encoder) { + size_t colflags_size = ((((size_t)h + 3U) / 4U) + 2U); /* Can't overflow, h checked against UINT32_MAX - 3U */ @@ -1595,6 +1784,64 @@ static OPJ_BOOL opj_t1_allocate_buffers( t1->colflags_size = (OPJ_UINT32)colflags_size; } memset(t1->colflags, 0, colflags_size * sizeof(opj_colflag_t)); + } else { + /* BIG FAT XXX */ + opj_flag_enc_t* p; + OPJ_UINT32 x; + OPJ_UINT32 flags_height = (h + 3U) / 4U; + + if (flagssize > (size_t)t1->flagssize) { + /* Overflow check */ + if (flagssize > (SIZE_MAX / sizeof(opj_flag_enc_t))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + opj_aligned_free(t1->enc_flags); + t1->enc_flags = (opj_flag_enc_t*) opj_aligned_malloc(flagssize * sizeof( + opj_flag_enc_t)); + if (!t1->enc_flags) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ + /* TODO remove this if t1->flagssize type changes to size_t */ + /* Overflow check */ + if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#endif + } + t1->flagssize = (OPJ_UINT32)flagssize; + + memset(t1->enc_flags, 0, flagssize * sizeof(opj_flag_enc_t)); + + p = &t1->enc_flags[0]; + for (x = 0; x < t1->flags_stride; ++x) { + /* magic value to hopefully stop any passes being interested in this entry */ + *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + } + + p = &t1->enc_flags[((flags_height + 1) * t1->flags_stride)]; + for (x = 0; x < t1->flags_stride; ++x) { + /* magic value to hopefully stop any passes being interested in this entry */ + *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + } + + if (h % 4) { + OPJ_UINT32 v = 0; + p = &t1->enc_flags[((flags_height) * t1->flags_stride)]; + if (h % 4 == 1) { + v |= T1_PI_1 | T1_PI_2 | T1_PI_3; + } else if (h % 4 == 2) { + v |= T1_PI_2 | T1_PI_3; + } else if (h % 4 == 3) { + v |= T1_PI_3; + } + for (x = 0; x < t1->flags_stride; ++x) { + *p++ = v; + } + } } t1->w = w; @@ -1670,6 +1917,11 @@ void opj_t1_destroy(opj_t1_t *p_t1) opj_aligned_free(p_t1->colflags); p_t1->colflags = 00; } + + if (p_t1->enc_flags) { + opj_aligned_free(p_t1->enc_flags); + p_t1->enc_flags = 00; + } opj_free(p_t1); } @@ -2146,13 +2398,13 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, switch (passtype) { case 0: - opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty); + opj_t1_enc_sigpass(t1, bpno, orient, &nmsedec, type, cblksty); break; case 1: opj_t1_enc_refpass(t1, bpno, &nmsedec, type, cblksty); break; case 2: - opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty); + opj_t1_enc_clnpass(t1, bpno, orient, &nmsedec, cblksty); /* code switch SEGMARK (i.e. SEGSYM) */ if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) { opj_mqc_segmark_enc(mqc); diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 31a2d9f3..f5ce9108 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -11,6 +11,7 @@ * Copyright (c) 2003-2007, Francois-Olivier Devaux * Copyright (c) 2003-2014, Antonin Descampe * Copyright (c) 2005, Herve Drolon, FreeImage Team + * Copyright (c) 2012, Carl Hetherington * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -112,6 +113,101 @@ in T1.C are used by some function in TCD.C. #define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3U * T1_COLFLAG_RBS)) #define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3U * T1_COLFLAG_RBS)) + +/* BEGINNING of flags that apply to opj_flag_enc_t */ +/** We hold the state of individual data points for the T1 encoder using + * a single 32-bit flags word to hold the state of 4 data points. This corresponds + * to the 4-point-high columns that the data is processed in. + * + * These #defines declare the layout of a 32-bit flags word. + * + * This is currently done for encoding only. + */ + +#define T1_SIGMA_0 (1U << 0) +#define T1_SIGMA_1 (1U << 1) +#define T1_SIGMA_2 (1U << 2) +#define T1_SIGMA_3 (1U << 3) +#define T1_SIGMA_4 (1U << 4) +#define T1_SIGMA_5 (1U << 5) +#define T1_SIGMA_6 (1U << 6) +#define T1_SIGMA_7 (1U << 7) +#define T1_SIGMA_8 (1U << 8) +#define T1_SIGMA_9 (1U << 9) +#define T1_SIGMA_10 (1U << 10) +#define T1_SIGMA_11 (1U << 11) +#define T1_SIGMA_12 (1U << 12) +#define T1_SIGMA_13 (1U << 13) +#define T1_SIGMA_14 (1U << 14) +#define T1_SIGMA_15 (1U << 15) +#define T1_SIGMA_16 (1U << 16) +#define T1_SIGMA_17 (1U << 17) + +#define T1_CHI_0 (1U << 18) +#define T1_CHI_0_I 18 +#define T1_CHI_1 (1U << 19) +#define T1_CHI_1_I 19 +#define T1_MU_0 (1U << 20) +#define T1_PI_0 (1U << 21) +#define T1_CHI_2 (1U << 22) +#define T1_CHI_2_I 22 +#define T1_MU_1 (1U << 23) +#define T1_PI_1 (1U << 24) +#define T1_CHI_3 (1U << 25) +#define T1_MU_2 (1U << 26) +#define T1_PI_2 (1U << 27) +#define T1_CHI_4 (1U << 28) +#define T1_MU_3 (1U << 29) +#define T1_PI_3 (1U << 30) +#define T1_CHI_5 (1U << 31) + + +/** As an example, the bits T1_SIGMA_3, T1_SIGMA_4 and T1_SIGMA_5 + * indicate the significance state of the west neighbour of data point zero + * of our four, the point itself, and its east neighbour respectively. + * Many of the bits are arranged so that given a flags word, you can + * look at the values for the data point 0, then shift the flags + * word right by 3 bits and look at the same bit positions to see the + * values for data point 1. + * + * The #defines below help a bit with this; say you have a flags word + * f, you can do things like + * + * (f & T1_SIGMA_THIS) + * + * to see the significance bit of data point 0, then do + * + * ((f >> 3) & T1_SIGMA_THIS) + * + * to see the significance bit of data point 1. + */ + +#define T1_SIGMA_NW T1_SIGMA_0 +#define T1_SIGMA_N T1_SIGMA_1 +#define T1_SIGMA_NE T1_SIGMA_2 +#define T1_SIGMA_W T1_SIGMA_3 +#define T1_SIGMA_THIS T1_SIGMA_4 +#define T1_SIGMA_E T1_SIGMA_5 +#define T1_SIGMA_SW T1_SIGMA_6 +#define T1_SIGMA_S T1_SIGMA_7 +#define T1_SIGMA_SE T1_SIGMA_8 +#define T1_SIGMA_NEIGHBOURS (T1_SIGMA_NW | T1_SIGMA_N | T1_SIGMA_NE | T1_SIGMA_W | T1_SIGMA_E | T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE) + +#define T1_CHI_THIS T1_CHI_1 +#define T1_CHI_THIS_I T1_CHI_1_I +#define T1_MU_THIS T1_MU_0 +#define T1_PI_THIS T1_PI_0 + +#define T1_LUT_SGN_W (1U << 0) +#define T1_LUT_SIG_N (1U << 1) +#define T1_LUT_SGN_E (1U << 2) +#define T1_LUT_SIG_W (1U << 3) +#define T1_LUT_SGN_N (1U << 4) +#define T1_LUT_SIG_E (1U << 5) +#define T1_LUT_SGN_S (1U << 6) +#define T1_LUT_SIG_S (1U << 7) +/* END of flags that apply to opj_flag_enc_t */ + /* ----------------------------------------------------------------------- */ typedef OPJ_UINT16 opj_flag_t; @@ -119,6 +215,8 @@ typedef OPJ_UINT16 opj_flag_t; /** Flags for 4 consecutive rows of a column */ typedef OPJ_UINT16 opj_colflag_t; +typedef OPJ_UINT32 opj_flag_enc_t; + /** Tier-1 coding (coding of code-block coefficients) */ @@ -130,12 +228,15 @@ typedef struct opj_t1 { opj_raw_t *raw; OPJ_INT32 *data; + /** Flags used by decoder */ opj_flag_t *flags; /** Addition flag array such that colflags[1+0] is for state of col=0,row=0..3, colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ... This array avoids too much cache trashing when processing by 4 vertical samples as done in the various decoding steps. */ opj_colflag_t* colflags; + /** Flags used by encoder */ + opj_flag_enc_t *enc_flags; OPJ_UINT32 w; OPJ_UINT32 h; OPJ_UINT32 datasize; diff --git a/src/lib/openjp2/t1_generate_luts.c b/src/lib/openjp2/t1_generate_luts.c index 2f7505d8..be0243c3 100644 --- a/src/lib/openjp2/t1_generate_luts.c +++ b/src/lib/openjp2/t1_generate_luts.c @@ -12,6 +12,7 @@ * Copyright (c) 2003-2014, Antonin Descampe * Copyright (c) 2005, Herve Drolon, FreeImage Team * Copyright (c) 2007, Callum Lerwick + * Copyright (c) 2012, Carl Hetherington * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -118,6 +119,83 @@ static int t1_init_ctxno_zc(unsigned int f, unsigned int orient) return (T1_CTXNO_ZC + n); } +static int t1_init_enc_ctxno_zc(int f, int orient) +{ + int h, v, d, n, t, hv; + n = 0; + h = ((f & T1_SIGMA_3) != 0) + ((f & T1_SIGMA_5) != 0); + v = ((f & T1_SIGMA_1) != 0) + ((f & T1_SIGMA_7) != 0); + d = ((f & T1_SIGMA_0) != 0) + ((f & T1_SIGMA_2) != 0) + (( + f & T1_SIGMA_8) != 0) + ((f & T1_SIGMA_6) != 0); + + switch (orient) { + case 2: + t = h; + h = v; + v = t; + case 0: + case 1: + if (!h) { + if (!v) { + if (!d) { + n = 0; + } else if (d == 1) { + n = 1; + } else { + n = 2; + } + } else if (v == 1) { + n = 3; + } else { + n = 4; + } + } else if (h == 1) { + if (!v) { + if (!d) { + n = 5; + } else { + n = 6; + } + } else { + n = 7; + } + } else { + n = 8; + } + break; + case 3: + hv = h + v; + if (!d) { + if (!hv) { + n = 0; + } else if (hv == 1) { + n = 1; + } else { + n = 2; + } + } else if (d == 1) { + if (!hv) { + n = 3; + } else if (hv == 1) { + n = 4; + } else { + n = 5; + } + } else if (d == 2) { + if (!hv) { + n = 6; + } else { + n = 7; + } + } else { + n = 8; + } + break; + } + + return (T1_CTXNO_ZC + n); +} + static int t1_init_ctxno_sc(unsigned int f) { int hc, vc, n; @@ -162,6 +240,50 @@ static int t1_init_ctxno_sc(unsigned int f) return (T1_CTXNO_SC + n); } +static int t1_init_enc_ctxno_sc(int f) +{ + int hc, vc, n; + n = 0; + + hc = opj_int_min(((f & (T1_LUT_SIG_E | T1_LUT_SGN_E)) == + T1_LUT_SIG_E) + ((f & (T1_LUT_SIG_W | T1_LUT_SGN_W)) == T1_LUT_SIG_W), + 1) - opj_int_min(((f & (T1_LUT_SIG_E | T1_LUT_SGN_E)) == + (T1_LUT_SIG_E | T1_LUT_SGN_E)) + + ((f & (T1_LUT_SIG_W | T1_LUT_SGN_W)) == + (T1_LUT_SIG_W | T1_LUT_SGN_W)), 1); + + vc = opj_int_min(((f & (T1_LUT_SIG_N | T1_LUT_SGN_N)) == + T1_LUT_SIG_N) + ((f & (T1_LUT_SIG_S | T1_LUT_SGN_S)) == T1_LUT_SIG_S), + 1) - opj_int_min(((f & (T1_LUT_SIG_N | T1_LUT_SGN_N)) == + (T1_LUT_SIG_N | T1_LUT_SGN_N)) + + ((f & (T1_LUT_SIG_S | T1_LUT_SGN_S)) == + (T1_LUT_SIG_S | T1_LUT_SGN_S)), 1); + + if (hc < 0) { + hc = -hc; + vc = -vc; + } + if (!hc) { + if (vc == -1) { + n = 1; + } else if (!vc) { + n = 0; + } else { + n = 1; + } + } else if (hc == 1) { + if (vc == -1) { + n = 2; + } else if (!vc) { + n = 3; + } else { + n = 4; + } + } + + return (T1_CTXNO_SC + n); +} + static int t1_init_spb(unsigned int f) { int hc, vc, n; @@ -189,14 +311,43 @@ static int t1_init_spb(unsigned int f) return n; } +static int t1_init_enc_spb(int f) +{ + int hc, vc, n; + + hc = opj_int_min(((f & (T1_LUT_SIG_E | T1_LUT_SGN_E)) == + T1_LUT_SIG_E) + ((f & (T1_LUT_SIG_W | T1_LUT_SGN_W)) == T1_LUT_SIG_W), + 1) - opj_int_min(((f & (T1_LUT_SIG_E | T1_LUT_SGN_E)) == + (T1_LUT_SIG_E | T1_LUT_SGN_E)) + + ((f & (T1_LUT_SIG_W | T1_LUT_SGN_W)) == + (T1_LUT_SIG_W | T1_LUT_SGN_W)), 1); + + vc = opj_int_min(((f & (T1_LUT_SIG_N | T1_LUT_SGN_N)) == + T1_LUT_SIG_N) + ((f & (T1_LUT_SIG_S | T1_LUT_SGN_S)) == T1_LUT_SIG_S), + 1) - opj_int_min(((f & (T1_LUT_SIG_N | T1_LUT_SGN_N)) == + (T1_LUT_SIG_N | T1_LUT_SGN_N)) + + ((f & (T1_LUT_SIG_S | T1_LUT_SGN_S)) == + (T1_LUT_SIG_S | T1_LUT_SGN_S)), 1); + + if (!hc && !vc) { + n = 0; + } else { + n = (!(hc > 0 || (!hc && vc > 0))); + } + + return n; +} + static void dump_array16(int array[], int size) { int i; --size; for (i = 0; i < size; ++i) { - printf("0x%04x, ", array[i]); + printf("0x%04x,", array[i]); if (!((i + 1) & 0x7)) { - printf("\n "); + printf("\n "); + } else { + printf(" "); } } printf("0x%04x\n};\n\n", array[size]); @@ -208,6 +359,7 @@ int main(int argc, char **argv) double u, v, t; int lut_ctxno_zc[1024]; + int lut_enc_ctxno_zc[2048]; int lut_nmsedec_sig[1 << T1_NMSEDEC_BITS]; int lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS]; int lut_nmsedec_ref[1 << T1_NMSEDEC_BITS]; @@ -230,35 +382,89 @@ int main(int argc, char **argv) } } - printf("static const OPJ_BYTE lut_ctxno_zc[1024] = {\n "); + printf("static const OPJ_BYTE lut_ctxno_zc[1024] = {\n "); for (i = 0U; i < 1023U; ++i) { - printf("%i, ", lut_ctxno_zc[i]); + printf("%i,", lut_ctxno_zc[i]); if (!((i + 1U) & 0x1fU)) { - printf("\n "); + printf("\n "); + } else { + printf(" "); } } printf("%i\n};\n\n", lut_ctxno_zc[1023]); + /* lut_enc_ctxno_zc */ + for (j = 0; j < 4; ++j) { + for (i = 0; i < 512; ++i) { + int orient = j; + if (orient == 2) { + orient = 1; + } else if (orient == 1) { + orient = 2; + } + lut_enc_ctxno_zc[(orient << 9) | i] = t1_init_enc_ctxno_zc(i, j); + } + } + + printf("static const OPJ_BYTE lut_enc_ctxno_zc[2048] = {\n "); + for (i = 0; i < 2047; ++i) { + printf("%i,", lut_enc_ctxno_zc[i]); + if (!((i + 1) & 0x1f)) { + printf("\n "); + } else { + printf(" "); + } + } + printf("%i\n};\n\n", lut_enc_ctxno_zc[2047]); + /* lut_ctxno_sc */ - printf("static const OPJ_BYTE lut_ctxno_sc[256] = {\n "); + printf("static const OPJ_BYTE lut_ctxno_sc[256] = {\n "); for (i = 0U; i < 255U; ++i) { - printf("0x%x, ", t1_init_ctxno_sc(i << 4)); + printf("0x%x,", t1_init_ctxno_sc(i << 4)); if (!((i + 1U) & 0xfU)) { - printf("\n "); + printf("\n "); + } else { + printf(" "); } } printf("0x%x\n};\n\n", t1_init_ctxno_sc(255U << 4)); + /* lut_enc_ctxno_sc */ + printf("static const OPJ_BYTE lut_enc_ctxno_sc[256] = {\n "); + for (i = 0; i < 255; ++i) { + printf("0x%x,", t1_init_enc_ctxno_sc(i)); + if (!((i + 1) & 0xf)) { + printf("\n "); + } else { + printf(" "); + } + } + printf("0x%x\n};\n\n", t1_init_enc_ctxno_sc(255)); + /* lut_spb */ - printf("static const OPJ_BYTE lut_spb[256] = {\n "); + printf("static const OPJ_BYTE lut_spb[256] = {\n "); for (i = 0U; i < 255U; ++i) { - printf("%i, ", t1_init_spb(i << 4)); + printf("%i,", t1_init_spb(i << 4)); if (!((i + 1U) & 0x1fU)) { - printf("\n "); + printf("\n "); + } else { + printf(" "); } } printf("%i\n};\n\n", t1_init_spb(255U << 4)); + /* lut_enc_spb */ + printf("static const OPJ_BYTE lut_enc_spb[256] = {\n "); + for (i = 0; i < 255; ++i) { + printf("%i,", t1_init_enc_spb(i)); + if (!((i + 1) & 0x1f)) { + printf("\n "); + } else { + printf(" "); + } + } + printf("%i\n};\n\n", t1_init_enc_spb(255)); + /* FIXME FIXME FIXME */ /* fprintf(stdout,"nmsedec luts:\n"); */ for (i = 0U; i < (1U << T1_NMSEDEC_BITS); ++i) { @@ -289,16 +495,16 @@ int main(int argc, char **argv) T1_NMSEDEC_FRACBITS) * 8192.0)); } - printf("static const OPJ_INT16 lut_nmsedec_sig[1U << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_sig[1U << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_sig, 1U << T1_NMSEDEC_BITS); - printf("static const OPJ_INT16 lut_nmsedec_sig0[1U << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_sig0[1U << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_sig0, 1U << T1_NMSEDEC_BITS); - printf("static const OPJ_INT16 lut_nmsedec_ref[1U << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_ref[1U << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_ref, 1U << T1_NMSEDEC_BITS); - printf("static const OPJ_INT16 lut_nmsedec_ref0[1U << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_ref0[1U << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_ref0, 1U << T1_NMSEDEC_BITS); return 0; diff --git a/src/lib/openjp2/t1_luts.h b/src/lib/openjp2/t1_luts.h index 8402b7a8..fe560e2e 100644 --- a/src/lib/openjp2/t1_luts.h +++ b/src/lib/openjp2/t1_luts.h @@ -35,6 +35,73 @@ static const OPJ_BYTE lut_ctxno_zc[1024] = { 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8 }; +static const OPJ_BYTE lut_enc_ctxno_zc[2048] = { + 0, 1, 3, 3, 1, 2, 3, 3, 5, 6, 7, 7, 6, 6, 7, 7, 0, 1, 3, 3, 1, 2, 3, 3, 5, 6, 7, 7, 6, 6, 7, 7, + 5, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 5, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 2, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 2, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 1, 5, 6, 1, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, 0, 1, 5, 6, 1, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, + 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, + 1, 2, 6, 6, 2, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, 1, 2, 6, 6, 2, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, + 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, + 5, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 5, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 6, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 6, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 1, 2, 6, 6, 2, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, 1, 2, 6, 6, 2, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, + 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, + 2, 2, 6, 6, 2, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, 2, 2, 6, 6, 2, 2, 6, 6, 3, 3, 7, 7, 3, 3, 7, 7, + 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, 3, 3, 7, 7, 3, 3, 7, 7, 4, 4, 7, 7, 4, 4, 7, 7, + 6, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 6, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 6, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 6, 6, 8, 8, 6, 6, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, 7, 7, 8, 8, + 0, 1, 3, 3, 1, 2, 3, 3, 5, 6, 7, 7, 6, 6, 7, 7, 0, 1, 3, 3, 1, 2, 3, 3, 5, 6, 7, 7, 6, 6, 7, 7, + 5, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 5, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 2, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 2, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, + 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 4, 4, 3, 3, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 3, 1, 4, 3, 6, 4, 7, 1, 4, 2, 5, 4, 7, 5, 7, 0, 3, 1, 4, 3, 6, 4, 7, 1, 4, 2, 5, 4, 7, 5, 7, + 1, 4, 2, 5, 4, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, 1, 4, 2, 5, 4, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, + 3, 6, 4, 7, 6, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, 3, 6, 4, 7, 6, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, + 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, + 1, 4, 2, 5, 4, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, 1, 4, 2, 5, 4, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, + 2, 5, 2, 5, 5, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, 2, 5, 2, 5, 5, 7, 5, 7, + 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, + 5, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, + 3, 6, 4, 7, 6, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, 3, 6, 4, 7, 6, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, + 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, + 6, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 6, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, + 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, + 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 4, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, + 5, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, 5, 7, 5, 7, 7, 8, 7, 8, + 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, + 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 7, 8, 8, 8, 8, 8 +}; + static const OPJ_BYTE lut_ctxno_sc[256] = { 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xd, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd, 0x9, 0xa, 0xc, 0xb, 0xa, 0x9, 0xd, 0xc, 0xc, 0xb, 0xc, 0xb, 0xd, 0xc, 0xd, 0xc, @@ -54,6 +121,25 @@ static const OPJ_BYTE lut_ctxno_sc[256] = { 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xd, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd }; +static const OPJ_BYTE lut_enc_ctxno_sc[256] = { + 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0xc, 0xc, 0xd, 0xb, 0xc, 0xc, 0xd, 0xb, + 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0xc, 0xc, 0xb, 0xd, 0xc, 0xc, 0xb, 0xd, + 0xc, 0xc, 0xd, 0xd, 0xc, 0xc, 0xb, 0xb, 0xc, 0x9, 0xd, 0xa, 0x9, 0xc, 0xa, 0xb, + 0xc, 0xc, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xc, 0x9, 0xb, 0xa, 0x9, 0xc, 0xa, 0xd, + 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0xc, 0xc, 0xd, 0xb, 0xc, 0xc, 0xd, 0xb, + 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0xc, 0xc, 0xb, 0xd, 0xc, 0xc, 0xb, 0xd, + 0xc, 0xc, 0xd, 0xd, 0xc, 0xc, 0xb, 0xb, 0xc, 0x9, 0xd, 0xa, 0x9, 0xc, 0xa, 0xb, + 0xc, 0xc, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xc, 0x9, 0xb, 0xa, 0x9, 0xc, 0xa, 0xd, + 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xd, 0xb, 0xd, 0xb, 0xd, 0xb, 0xd, 0xb, + 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xd, 0xb, 0xc, 0xc, 0xd, 0xb, 0xc, 0xc, + 0xd, 0xd, 0xd, 0xd, 0xb, 0xb, 0xb, 0xb, 0xd, 0xa, 0xd, 0xa, 0xa, 0xb, 0xa, 0xb, + 0xd, 0xd, 0xc, 0xc, 0xb, 0xb, 0xc, 0xc, 0xd, 0xa, 0xc, 0x9, 0xa, 0xb, 0x9, 0xc, + 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xb, 0xd, 0xc, 0xc, 0xb, 0xd, 0xc, 0xc, + 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xb, 0xd, 0xb, 0xd, 0xb, 0xd, 0xb, 0xd, + 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xc, 0xc, 0xb, 0xa, 0xc, 0x9, 0xa, 0xd, 0x9, 0xc, + 0xb, 0xb, 0xb, 0xb, 0xd, 0xd, 0xd, 0xd, 0xb, 0xa, 0xb, 0xa, 0xa, 0xd, 0xa, 0xd +}; + static const OPJ_BYTE lut_spb[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, @@ -65,6 +151,17 @@ static const OPJ_BYTE lut_spb[256] = { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; +static const OPJ_BYTE lut_enc_spb[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, + 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1 +}; + static const OPJ_INT16 lut_nmsedec_sig[1U << T1_NMSEDEC_BITS] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, From 4068363ff5021608180f712823db55d9c0a551e6 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 22 May 2017 18:42:46 +0200 Subject: [PATCH 02/19] T1: fix VSC mode in encoder --- src/lib/openjp2/t1.c | 489 ++++++++++++++++++++++++++++--------------- src/lib/openjp2/t1.h | 11 +- 2 files changed, 329 insertions(+), 171 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 982dbaa6..cfce7499 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -13,6 +13,7 @@ * Copyright (c) 2005, Herve Drolon, FreeImage Team * Copyright (c) 2007, Callum Lerwick * Copyright (c) 2012, Carl Hetherington + * Copyright (c) 2017, IntoPIX SA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,22 +63,11 @@ static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); -static void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, - OPJ_UINT32 s, OPJ_UINT32 stride); +static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, + OPJ_UINT32 s, OPJ_UINT32 stride); static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride); -/** -Encode significant pass -*/ -static void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_enc_t *flagsp, - OPJ_INT32 *datap, - OPJ_UINT32 orient, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 k); + /** Decode significant pass @@ -132,18 +122,6 @@ static void opj_t1_dec_sigpass_mqc_vsc( -/** -Encode refinement pass -*/ -static void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_enc_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type); - - /** Encode refinement pass */ @@ -194,22 +172,6 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( OPJ_UINT32 row); - -/** -Encode clean-up pass -*/ -static void opj_t1_enc_clnpass_step( - opj_t1_t *t1, - opj_flag_enc_t *flagsp, - OPJ_INT32 *datap, - OPJ_UINT32 orient, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_UINT32 agg, - OPJ_UINT32 runlen, - OPJ_UINT32 x, - OPJ_UINT32 y); /** Decode clean-up pass */ @@ -407,8 +369,8 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) } -static void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, - OPJ_UINT32 s, OPJ_UINT32 stride) +static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, + OPJ_UINT32 s, OPJ_UINT32 stride) { /* set up to point to the north and south data points' flags words, if required */ opj_flag_enc_t* north; @@ -530,61 +492,66 @@ static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, } } - -void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_enc_t *flagsp, - OPJ_INT32 *datap, - OPJ_UINT32 orient, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 k - ) +/** +Encode significant pass +*/ +static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, + opj_flag_enc_t *flagsp, + OPJ_INT32 *datap, + OPJ_UINT32 orient, + OPJ_INT32 bpno, + OPJ_INT32 one, + OPJ_INT32 *nmsedec, + OPJ_BYTE type, + OPJ_UINT32 ci, + OPJ_UINT32 vsc + ) { OPJ_INT32 v; - OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - if (*flagsp == 0U) { - /* Nothing to do for any of the 4 data points */ - return; - } + OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | + T1_CHI_S) << (ci * 3U)) : ~0U; + OPJ_UINT32 const flags = *flagsp & vsc_mask; - for (ci = 0U; ci < 4U; ++ci) { - /* XXX:TODO enc_flags_t and vsc mode a la - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; - flag = vsc ? ((*dec_flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*dec_flagsp); - */ - - OPJ_UINT32 const shift_flags = *flagsp >> (ci * 3U); - - if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == 0U && - (shift_flags & T1_SIGMA_NEIGHBOURS) != 0U) { - v = opj_int_abs(*datap) & one ? 1 : 0; - opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_zc(shift_flags, - orient)); /* ESSAI */ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { + OPJ_UINT32 ctxt1 = opj_t1_enc_getctxno_zc(flags >> (ci * 3U), + orient); + v = opj_int_abs(*datap) & one ? 1 : 0; +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " ctxt1=%d\n", ctxt1); +#endif + opj_mqc_setcurctx(mqc, ctxt1); /* ESSAI */ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ + opj_mqc_bypass_enc(mqc, v); + } else { + opj_mqc_encode(mqc, v); + } + if (v) { + OPJ_UINT32 ctxt2 = opj_t1_enc_getctxno_sc(*flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + ci); + v = *datap < 0 ? 1 : 0; + *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " ctxt2=%d\n", ctxt2); +#endif + opj_mqc_setcurctx(mqc, ctxt2); /* ESSAI */ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ opj_mqc_bypass_enc(mqc, v); } else { - opj_mqc_encode(mqc, v); + OPJ_UINT32 spb = opj_t1_enc_getspb(*flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " spb=%d\n", spb); +#endif + opj_mqc_encode(mqc, v ^ spb); } - if (v) { - v = *datap < 0 ? 1 : 0; - *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); - opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_sc(*flagsp, flagsp[-1], flagsp[1], - ci)); /* ESSAI */ - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - opj_mqc_encode(mqc, v ^ opj_t1_enc_getspb(*flagsp, flagsp[-1], flagsp[1], ci)); - } - opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); - } - *flagsp |= T1_PI_0 << (ci * 3U); + opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); } - datap += t1->data_stride; + *flagsp |= T1_PI_THIS << (ci * 3U); } } @@ -701,26 +668,99 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_enc_t* f = &ENC_FLAGS(0, 0); OPJ_UINT32 const extra = t1->flags_stride - t1->w; - OPJ_INT32* d = t1->data; *nmsedec = 0; - for (k = 0; k < t1->h; k += 4) { +#ifdef DEBUG_ENC_SIG + fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno); +#endif + for (k = 0; k < (t1->h & ~3U); k += 4) { +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " k=%d\n", k); +#endif for (i = 0; i < t1->w; ++i) { +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " i=%d\n", i); +#endif + if (*f == 0U) { + /* Nothing to do for any of the 4 data points */ + f++; + continue; + } opj_t1_enc_sigpass_step( t1, f, - &t1->data[(k * t1->data_stride) + i], + &t1->data[((k + 0) * t1->data_stride) + i], orient, bpno, one, nmsedec, - type, k); - + type, + 0, 0); + opj_t1_enc_sigpass_step( + t1, + f, + &t1->data[((k + 1) * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + type, + 1, 0); + opj_t1_enc_sigpass_step( + t1, + f, + &t1->data[((k + 2) * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + type, + 2, 0); + opj_t1_enc_sigpass_step( + t1, + f, + &t1->data[((k + 3) * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + type, + 3, cblksty & J2K_CCP_CBLKSTY_VSC); ++f; - ++d; } f += extra; } + + if (k < t1->h) { + OPJ_UINT32 j; +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " k=%d\n", k); +#endif + for (i = 0; i < t1->w; ++i) { +#ifdef DEBUG_ENC_SIG + fprintf(stderr, " i=%d\n", i); +#endif + if (*f == 0U) { + /* Nothing to do for any of the 4 data points */ + f++; + continue; + } + for (j = k; j < t1->h; ++j) { + opj_t1_enc_sigpass_step( + t1, + f, + &t1->data[(j * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + type, + j - k, + (j == t1->h - 1 && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); + } + ++f; + } + } } static void opj_t1_dec_sigpass_raw( @@ -868,49 +908,45 @@ static void opj_t1_dec_sigpass_mqc_vsc( } } /* VSC and BYPASS by Antonin */ -static void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_enc_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 bpno, - OPJ_INT32 one, - OPJ_INT32 *nmsedec, - OPJ_BYTE type) + +/** +Encode refinement pass step +*/ +static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, + opj_flag_enc_t *flagsp, + OPJ_INT32 *datap, + OPJ_INT32 bpno, + OPJ_INT32 one, + OPJ_INT32 *nmsedec, + OPJ_BYTE type, + OPJ_UINT32 ci, + OPJ_UINT32 vsc + ) { OPJ_INT32 v; - OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - if ((*flagsp & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { - /* none significant */ - return; - } - if ((*flagsp & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == - (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { - /* all processed by sigpass */ - return; - } + OPJ_UINT32 const shift_flags = + vsc ? + ((*flagsp >> (ci * 3U)) & ~(T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S)) + : + (*flagsp >> (ci * 3U)); - for (ci = 0U; ci < 4U; ++ci) { - - /* XXX:TODO vsc mode as per - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; - flag = vsc ? ((*dec_flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*dec_flagsp); - */ - - OPJ_UINT32 shift_flags = *flagsp >> (ci * 3U); - if ((shift_flags & (T1_SIGMA_4 | T1_PI_0)) == T1_SIGMA_4) { - *nmsedec += opj_t1_getnmsedec_ref(opj_int_abs(*datap), bpno); - v = opj_int_abs(*datap) & one ? 1 : 0; - opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_mag(shift_flags)); /* ESSAI */ - if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ - opj_mqc_bypass_enc(mqc, v); - } else { - opj_mqc_encode(mqc, v); - } - *flagsp |= T1_MU_0 << (ci * 3U); + if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { + OPJ_UINT32 ctxt = opj_t1_enc_getctxno_mag(shift_flags); + *nmsedec += opj_t1_getnmsedec_ref(opj_int_abs(*datap), bpno); + v = opj_int_abs(*datap) & one ? 1 : 0; +#ifdef DEBUG_ENC_REF + fprintf(stderr, " ctxt=%d\n", ctxt); +#endif + opj_mqc_setcurctx(mqc, ctxt); /* ESSAI */ + if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ + opj_mqc_bypass_enc(mqc, v); + } else { + opj_mqc_encode(mqc, v); } - datap += t1->data_stride; + *flagsp |= T1_MU_THIS << (ci * 3U); } } @@ -1023,20 +1059,99 @@ static void opj_t1_enc_refpass( const OPJ_UINT32 extra = t1->flags_stride - t1->w; *nmsedec = 0; - for (k = 0U; k < t1->h; k += 4U) { - for (i = 0U; i < t1->w; ++i) { +#ifdef DEBUG_ENC_REF + fprintf(stderr, "enc_refpass: bpno=%d\n", bpno); +#endif + for (k = 0; k < (t1->h & ~3U); k += 4) { +#ifdef DEBUG_ENC_REF + fprintf(stderr, " k=%d\n", k); +#endif + for (i = 0; i < t1->w; ++i) { +#ifdef DEBUG_ENC_REF + fprintf(stderr, " i=%d\n", i); +#endif + if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + /* none significant */ + f++; + continue; + } + if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == + (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { + /* all processed by sigpass */ + f++; + continue; + } + opj_t1_enc_refpass_step( t1, f, - &t1->data[(k * t1->data_stride) + i], + &t1->data[((k + 0) * t1->data_stride) + i], bpno, one, nmsedec, - type); + type, + 0, 0); + opj_t1_enc_refpass_step( + t1, + f, + &t1->data[((k + 1) * t1->data_stride) + i], + bpno, + one, + nmsedec, + type, + 1, 0); + opj_t1_enc_refpass_step( + t1, + f, + &t1->data[((k + 2) * t1->data_stride) + i], + bpno, + one, + nmsedec, + type, + 2, 0); + opj_t1_enc_refpass_step( + t1, + f, + &t1->data[((k + 3) * t1->data_stride) + i], + bpno, + one, + nmsedec, + type, + 3, cblksty & J2K_CCP_CBLKSTY_VSC); ++f; } f += extra; } + + if (k < t1->h) { + OPJ_UINT32 j; +#ifdef DEBUG_ENC_REF + fprintf(stderr, " k=%d\n", k); +#endif + for (i = 0; i < t1->w; ++i) { +#ifdef DEBUG_ENC_REF + fprintf(stderr, " i=%d\n", i); +#endif + if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + /* none significant */ + f++; + continue; + } + for (j = k; j < t1->h; ++j) { + opj_t1_enc_refpass_step( + t1, + f, + &t1->data[(j * t1->data_stride) + i], + bpno, + one, + nmsedec, + type, + j - k, + (j == t1->h - 1 && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); + } + ++f; + } + } } @@ -1181,6 +1296,9 @@ static void opj_t1_dec_refpass_mqc_vsc( } } /* VSC and BYPASS by Antonin */ +/** +Encode clean-up pass step +*/ static void opj_t1_enc_clnpass_step( opj_t1_t *t1, opj_flag_enc_t *flagsp, @@ -1192,59 +1310,79 @@ static void opj_t1_enc_clnpass_step( OPJ_UINT32 agg, OPJ_UINT32 runlen, OPJ_UINT32 x, - OPJ_UINT32 y) + OPJ_UINT32 y, + OPJ_UINT32 cblksty) { OPJ_INT32 v; OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ OPJ_UINT32 lim; - const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | - T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + if ((cblksty & J2K_CCP_CBLKSTY_VSC) == 0) { + const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | + T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - if ((*flagsp & check) == check) { - if (runlen == 0) { - *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 1) { - *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 2) { - *flagsp &= ~(T1_PI_2 | T1_PI_3); - } else if (runlen == 3) { - *flagsp &= ~(T1_PI_3); + if ((*flagsp & check) == check) { + if (runlen == 0) { + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + } else if (runlen == 1) { + *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); + } else if (runlen == 2) { + *flagsp &= ~(T1_PI_2 | T1_PI_3); + } else if (runlen == 3) { + *flagsp &= ~(T1_PI_3); + } + return; } - return; } lim = 4U < (t1->h - y) ? 4U : (t1->h - y); for (ci = runlen; ci < lim; ++ci) { - opj_flag_enc_t shift_flags; + OPJ_UINT32 vsc; + OPJ_UINT32 vsc_mask; + opj_flag_enc_t flags; - /* XXX:TODO vsc mode a la - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; - flag = vsc ? ((*dec_flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*dec_flagsp); - */ + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == lim - 1)) ? 1 : 0; + vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << + (ci * 3U)) : ~0U; if ((agg != 0) && (ci == runlen)) { goto LABEL_PARTIAL; } - shift_flags = *flagsp >> (ci * 3U); + flags = *flagsp & vsc_mask; - if (!(shift_flags & (T1_SIGMA_4 | T1_PI_0))) { - opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_zc(shift_flags, orient)); + if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { + OPJ_UINT32 ctxt1 = opj_t1_enc_getctxno_zc(flags >> (ci * 3U), orient); +#ifdef DEBUG_ENC_CLN + printf(" ctxt1=%d\n", ctxt1); +#endif + opj_mqc_setcurctx(mqc, ctxt1); v = opj_int_abs(*datap) & one ? 1 : 0; opj_mqc_encode(mqc, v); if (v) { + OPJ_UINT32 ctxt2, spb; LABEL_PARTIAL: *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); - opj_mqc_setcurctx(mqc, opj_t1_enc_getctxno_sc(*flagsp, flagsp[-1], flagsp[1], - ci)); + ctxt2 = opj_t1_enc_getctxno_sc(*flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + ci); +#ifdef DEBUG_ENC_CLN + printf(" ctxt2=%d\n", ctxt2); +#endif + opj_mqc_setcurctx(mqc, ctxt2); + v = *datap < 0 ? 1 : 0; - opj_mqc_encode(mqc, v ^ opj_t1_enc_getspb(*flagsp, flagsp[-1], flagsp[1], ci)); + spb = opj_t1_enc_getspb(*flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); +#ifdef DEBUG_ENC_CLN + printf(" spb=%d\n", spb); +#endif + opj_mqc_encode(mqc, v ^ spb); opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); } } - *flagsp &= ~(T1_PI_0 << (3U * ci)); + *flagsp &= ~(T1_PI_THIS << (3U * ci)); datap += t1->data_stride; } } @@ -1383,20 +1521,30 @@ static void opj_t1_enc_clnpass( opj_mqc_t *mqc = t1->mqc; /* MQC component */ *nmsedec = 0; - +#ifdef DEBUG_ENC_CLN + printf("enc_clnpass: bpno=%d\n", bpno); +#endif for (k = 0; k < t1->h; k += 4) { +#ifdef DEBUG_ENC_CLN + printf(" k=%d\n", k); +#endif for (i = 0; i < t1->w; ++i) { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - /* XXX:TODO this needs implementing */ - agg = !(MACRO_t1_enc_flags(1 + k, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || MACRO_t1_enc_flags(1 + k + 1, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || MACRO_t1_enc_flags(1 + k + 2, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || (MACRO_t1_enc_flags(1 + k + 3, 1 + i) - & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | - T1_SIG_OTH)); +#ifdef DEBUG_ENC_CLN + printf(" i=%d\n", i); +#endif + if (k + 3 < t1->h) { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + agg = !(ENC_FLAGS(i, k) & ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | + T1_CHI_5)); + } else { + agg = !ENC_FLAGS(i, k); + } } else { - agg = !ENC_FLAGS(i, k); + agg = 0; } +#ifdef DEBUG_ENC_CLN + printf(" agg=%d\n", agg); +#endif if (agg) { for (runlen = 0; runlen < 4; ++runlen) { if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) { @@ -1425,7 +1573,8 @@ static void opj_t1_enc_clnpass( agg, runlen, i, - k); + k, + cblksty); } } } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index f5ce9108..1525dd9d 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -12,6 +12,7 @@ * Copyright (c) 2003-2014, Antonin Descampe * Copyright (c) 2005, Herve Drolon, FreeImage Team * Copyright (c) 2012, Carl Hetherington + * Copyright (c) 2017, IntoPIX SA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -122,6 +123,14 @@ in T1.C are used by some function in TCD.C. * These #defines declare the layout of a 32-bit flags word. * * This is currently done for encoding only. + * The values must NOT be changed, otherwise this is going to break a lot of + * assumptions. + */ + +/* SIGMA: significance state (3 cols x 6 rows) + * CHI: state for negative sample value (1 col x 6 rows) + * MU: state for visited in refinement pass (1 col x 4 rows) + * PI: state for visited in significance pass (1 col * 4 rows) */ #define T1_SIGMA_0 (1U << 0) @@ -161,7 +170,6 @@ in T1.C are used by some function in TCD.C. #define T1_PI_3 (1U << 30) #define T1_CHI_5 (1U << 31) - /** As an example, the bits T1_SIGMA_3, T1_SIGMA_4 and T1_SIGMA_5 * indicate the significance state of the west neighbour of data point zero * of our four, the point itself, and its east neighbour respectively. @@ -197,6 +205,7 @@ in T1.C are used by some function in TCD.C. #define T1_CHI_THIS_I T1_CHI_1_I #define T1_MU_THIS T1_MU_0 #define T1_PI_THIS T1_PI_0 +#define T1_CHI_S T1_CHI_2 #define T1_LUT_SGN_W (1U << 0) #define T1_LUT_SIG_N (1U << 1) From c76a59213100bbec567a0c78b7b1b8ba82d1e1fa Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 22 May 2017 23:30:30 +0200 Subject: [PATCH 03/19] T1: remove unused code in decoder --- src/lib/openjp2/t1.c | 106 ++++++++----------------------------------- src/lib/openjp2/t1.h | 4 +- 2 files changed, 22 insertions(+), 88 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index cfce7499..400bf000 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -57,7 +57,6 @@ static OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, OPJ_UINT32 ci); static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f); static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f); -static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, OPJ_UINT32 ci); static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); @@ -309,13 +308,6 @@ static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f) return tmp2; } -static OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f) -{ - OPJ_UINT32 tmp1 = (f & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; - OPJ_UINT32 tmp2 = (f & T1_REFINE) ? T1_CTXNO_MAG + 2 : tmp1; - return (tmp2); -} - static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, OPJ_UINT32 ci) { @@ -441,7 +433,9 @@ static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, np[1] |= T1_SIG_SW; flagsp[-1] |= (opj_flag_t)(flag_N << 1U); +#ifdef CONSISTENCY_CHECK flagsp[0] |= T1_SIG; +#endif flagsp[1] |= (opj_flag_t)(flag_N << 3U); sp[-1] |= T1_SIG_NE; @@ -1582,6 +1576,14 @@ static void opj_t1_enc_clnpass( #define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] +#ifdef CONSISTENCY_CHECK +#define REMOVE_VISIT_FLAG(flagptr) *(flagptr) &= (opj_flag_t)~T1_VISIT +#define T1_SIG_OR_VISIT (T1_SIG | T1_VISIT) +#else +#define REMOVE_VISIT_FLAG(flagptr) do {} while(0) +#define T1_SIG_OR_VISIT 0 +#endif + #define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ @@ -1652,7 +1654,7 @@ static void opj_t1_enc_clnpass( assert( agg == !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \ MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \ MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)) ); \ + MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG_OR_VISIT | T1_SIG_OTH)) ); \ } \ if (agg) { \ opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ @@ -1675,32 +1677,32 @@ static void opj_t1_enc_clnpass( } \ } else { \ flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ + if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ + if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ + if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ + if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ } \ *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ @@ -2642,73 +2644,3 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate); } } - -#if 0 -static void opj_t1_dec_refpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 poshalf, - OPJ_INT32 neghalf, - OPJ_BYTE type, - OPJ_UINT32 vsc) -{ - OPJ_INT32 t; - OPJ_UINT32 v, flag; - - opj_mqc_t *mqc = t1->mqc; /* MQC component */ - opj_raw_t *raw = t1->raw; /* RAW component */ - - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : - (*flagsp); - if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag(flag)); /* ESSAI */ - if (type == T1_TYPE_RAW) { - v = opj_raw_decode(raw); - } else { - v = opj_mqc_decode(mqc); - } - t = v ? poshalf : neghalf; - *datap += *datap < 0 ? -t : t; - *flagsp |= T1_REFINE; - } -} /* VSC and BYPASS by Antonin */ -#endif - - - -#if 0 -static void opj_t1_dec_sigpass_step(opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_UINT32 orient, - OPJ_INT32 oneplushalf, - OPJ_BYTE type, - OPJ_UINT32 vsc) -{ - OPJ_UINT32 v, flag; - - opj_raw_t *raw = t1->raw; /* RAW component */ - opj_mqc_t *mqc = t1->mqc; /* MQC component */ - - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : - (*flagsp); - if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { - if (type == T1_TYPE_RAW) { - if (opj_raw_decode(raw)) { - v = opj_raw_decode(raw); /* ESSAI */ - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, v, t1->flags_stride); - } - } else { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(flag, orient)); - if (opj_mqc_decode(mqc)) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb(flag); - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, v, t1->flags_stride); - } - } - *flagsp |= T1_VISIT; - } -} /* VSC and BYPASS by Antonin */ -#endif diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 1525dd9d..59c2d016 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -72,9 +72,11 @@ in T1.C are used by some function in TCD.C. #define T1_SGN_W 0x0800U #define T1_SGN (T1_SGN_N|T1_SGN_E|T1_SGN_S|T1_SGN_W) +#ifdef CONSISTENCY_CHECK #define T1_SIG 0x1000U /**< No longer used by decoder */ -#define T1_REFINE 0x2000U /**< No longer used by decoder */ #define T1_VISIT 0x4000U /**< No longer used by decoder */ +#endif + /* END of flags that apply to opj_flag_t */ #define T1_NUMCTXS_ZC 9 From d6907b93044108aacc6ab9281a7a9ab48546824c Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 23 May 2017 00:40:30 +0200 Subject: [PATCH 04/19] Optimize a bit opj_t1_enc_clnpass() --- src/lib/openjp2/t1.c | 76 ++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 400bf000..93e274d8 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1303,34 +1303,29 @@ static void opj_t1_enc_clnpass_step( OPJ_INT32 *nmsedec, OPJ_UINT32 agg, OPJ_UINT32 runlen, - OPJ_UINT32 x, - OPJ_UINT32 y, + OPJ_UINT32 lim, OPJ_UINT32 cblksty) { OPJ_INT32 v; OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - OPJ_UINT32 lim; - if ((cblksty & J2K_CCP_CBLKSTY_VSC) == 0) { - const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | - T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | + T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - if ((*flagsp & check) == check) { - if (runlen == 0) { - *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 1) { - *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); - } else if (runlen == 2) { - *flagsp &= ~(T1_PI_2 | T1_PI_3); - } else if (runlen == 3) { - *flagsp &= ~(T1_PI_3); - } - return; + if ((*flagsp & check) == check) { + if (runlen == 0) { + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); + } else if (runlen == 1) { + *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); + } else if (runlen == 2) { + *flagsp &= ~(T1_PI_2 | T1_PI_3); + } else if (runlen == 3) { + *flagsp &= ~(T1_PI_3); } + return; } - lim = 4U < (t1->h - y) ? 4U : (t1->h - y); for (ci = runlen; ci < lim; ++ci) { OPJ_UINT32 vsc; OPJ_UINT32 vsc_mask; @@ -1514,11 +1509,15 @@ static void opj_t1_enc_clnpass( opj_mqc_t *mqc = t1->mqc; /* MQC component */ + const OPJ_UINT32 agg_mask = (cblksty & J2K_CCP_CBLKSTY_VSC) ? + ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | T1_CHI_5) : + ~0U; + *nmsedec = 0; #ifdef DEBUG_ENC_CLN printf("enc_clnpass: bpno=%d\n", bpno); #endif - for (k = 0; k < t1->h; k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4) { #ifdef DEBUG_ENC_CLN printf(" k=%d\n", k); #endif @@ -1526,16 +1525,7 @@ static void opj_t1_enc_clnpass( #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); #endif - if (k + 3 < t1->h) { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - agg = !(ENC_FLAGS(i, k) & ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | - T1_CHI_5)); - } else { - agg = !ENC_FLAGS(i, k); - } - } else { - agg = 0; - } + agg = !(ENC_FLAGS(i, k) & agg_mask); #ifdef DEBUG_ENC_CLN printf(" agg=%d\n", agg); #endif @@ -1566,8 +1556,32 @@ static void opj_t1_enc_clnpass( nmsedec, agg, runlen, - i, - k, + 4U, + cblksty); + } + } + if (k < t1->h) { + agg = 0; + runlen = 0; +#ifdef DEBUG_ENC_CLN + printf(" k=%d\n", k); +#endif + for (i = 0; i < t1->w; ++i) { +#ifdef DEBUG_ENC_CLN + printf(" i=%d\n", i); + printf(" agg=%d\n", agg); +#endif + opj_t1_enc_clnpass_step( + t1, + &ENC_FLAGS(i, k), + &t1->data[((k + runlen) * t1->data_stride) + i], + orient, + bpno, + one, + nmsedec, + agg, + runlen, + t1->h - k, cblksty); } } From 40c0f42def262ed76e2f0048d09aab9e0ee1af02 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 23 May 2017 13:02:24 +0200 Subject: [PATCH 05/19] Factor index computation for lut_enc_ctxno_sc and lut_enc_spb --- src/lib/openjp2/t1.c | 113 ++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 65 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 93e274d8..e660d0ac 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -53,13 +53,9 @@ static INLINE OPJ_BYTE opj_t1_enc_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient); static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f); -static OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 fX, OPJ_UINT32 pfX, - OPJ_UINT32 nfX, OPJ_UINT32 ci); -static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f); +static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f); static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f); -static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, - OPJ_UINT32 ci); -static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); +static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, @@ -266,50 +262,10 @@ static OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f) return mqc->lut_ctxno_zc_orient[(f & T1_SIG_OTH)]; } - -static OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 fX, OPJ_UINT32 pfX, - OPJ_UINT32 nfX, OPJ_UINT32 ci) -{ - /* - 0 pfX T1_CHI_THIS T1_LUT_CTXNO_SGN_W - 1 tfX T1_SIGMA_1 T1_LUT_CTXNO_SIG_N - 2 nfX T1_CHI_THIS T1_LUT_CTXNO_SGN_E - 3 tfX T1_SIGMA_3 T1_LUT_CTXNO_SIG_W - 4 fX T1_CHI_(THIS - 1) T1_LUT_CTXNO_SGN_N - 5 tfX T1_SIGMA_5 T1_LUT_CTXNO_SIG_E - 6 fX T1_CHI_(THIS + 1) T1_LUT_CTXNO_SGN_S - 7 tfX T1_SIGMA_7 T1_LUT_CTXNO_SIG_S - */ - - OPJ_UINT32 lu = (fX >> (ci * 3)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 | - T1_SIGMA_7); - - lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0); - lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2); - if (ci == 0U) { - lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4); - } else { - lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4); - } - lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6); - - return lut_enc_ctxno_sc[lu]; -} - -static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f) -{ - return lut_ctxno_sc[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; -} - -static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f) -{ - OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; - OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp; - return tmp2; -} - -static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, - OPJ_UINT32 ci) +static INLINE OPJ_UINT32 opj_t1_enc_getctxtno_sc_or_spb_index(OPJ_UINT32 fX, + OPJ_UINT32 pfX, + OPJ_UINT32 nfX, + OPJ_UINT32 ci) { /* 0 pfX T1_CHI_THIS T1_LUT_SGN_W @@ -322,8 +278,8 @@ static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, 7 tfX T1_SIGMA_7 T1_LUT_SIG_S */ - int lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 | - T1_SIGMA_7); + OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 | + T1_SIGMA_7); lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0); lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2); @@ -333,11 +289,32 @@ static OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4); } lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6); + return lu; +} +static INLINE OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 lu) +{ + return lut_enc_ctxno_sc[lu]; +} + +static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f) +{ + return lut_ctxno_sc[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; +} + +static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f) +{ + OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; + OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp; + return tmp2; +} + +static INLINE OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 lu) +{ return lut_enc_spb[lu]; } -static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f) +static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f) { return lut_spb[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; } @@ -524,9 +501,13 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, opj_mqc_encode(mqc, v); } if (v) { - OPJ_UINT32 ctxt2 = opj_t1_enc_getctxno_sc(*flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, - ci); + /* Note: using flags instead of *flagsp & vsc_mask result */ + /* in slow down. Probably because of register pressure */ + OPJ_UINT32 lu = opj_t1_enc_getctxtno_sc_or_spb_index( + *flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + ci); + OPJ_UINT32 ctxt2 = opj_t1_enc_getctxno_sc(lu); v = *datap < 0 ? 1 : 0; *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); #ifdef DEBUG_ENC_SIG @@ -536,8 +517,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ opj_mqc_bypass_enc(mqc, v); } else { - OPJ_UINT32 spb = opj_t1_enc_getspb(*flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); + OPJ_UINT32 spb = opj_t1_enc_getspb(lu); #ifdef DEBUG_ENC_SIG fprintf(stderr, " spb=%d\n", spb); #endif @@ -1334,13 +1314,12 @@ static void opj_t1_enc_clnpass_step( vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == lim - 1)) ? 1 : 0; vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << (ci * 3U)) : ~0U; + flags = *flagsp & vsc_mask; if ((agg != 0) && (ci == runlen)) { goto LABEL_PARTIAL; } - flags = *flagsp & vsc_mask; - if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { OPJ_UINT32 ctxt1 = opj_t1_enc_getctxno_zc(flags >> (ci * 3U), orient); #ifdef DEBUG_ENC_CLN @@ -1351,19 +1330,23 @@ static void opj_t1_enc_clnpass_step( opj_mqc_encode(mqc, v); if (v) { OPJ_UINT32 ctxt2, spb; + OPJ_UINT32 lu; LABEL_PARTIAL: + /* Note: using flags instead of *flagsp & vsc_mask result */ + /* in slow down. Probably because of register pressure */ + lu = opj_t1_enc_getctxtno_sc_or_spb_index( + *flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + ci); *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); - ctxt2 = opj_t1_enc_getctxno_sc(*flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, - ci); + ctxt2 = opj_t1_enc_getctxno_sc(lu); #ifdef DEBUG_ENC_CLN printf(" ctxt2=%d\n", ctxt2); #endif opj_mqc_setcurctx(mqc, ctxt2); v = *datap < 0 ? 1 : 0; - spb = opj_t1_enc_getspb(*flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); + spb = opj_t1_enc_getspb(lu); #ifdef DEBUG_ENC_CLN printf(" spb=%d\n", spb); #endif From 1957a498b68af12bfa8d382f96e54f8403bb2fb6 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 23 May 2017 14:55:45 +0200 Subject: [PATCH 06/19] Fix compiler warnings --- src/lib/openjp2/t1.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index e660d0ac..6f46bfa8 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -374,6 +374,7 @@ static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, switch (ci) { case 0U: { *flagsp |= T1_CHI_1; + north = flagsp - stride; *north |= T1_CHI_5; break; } @@ -385,6 +386,7 @@ static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, break; case 3: { *flagsp |= T1_CHI_4; + south = flagsp + stride; *south |= T1_CHI_0; break; } @@ -478,7 +480,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, OPJ_UINT32 vsc ) { - OPJ_INT32 v; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -509,7 +511,8 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, ci); OPJ_UINT32 ctxt2 = opj_t1_enc_getctxno_sc(lu); v = *datap < 0 ? 1 : 0; - *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); + *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), + (OPJ_UINT32)bpno); #ifdef DEBUG_ENC_SIG fprintf(stderr, " ctxt2=%d\n", ctxt2); #endif @@ -897,7 +900,7 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, OPJ_UINT32 vsc ) { - OPJ_INT32 v; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -909,7 +912,8 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { OPJ_UINT32 ctxt = opj_t1_enc_getctxno_mag(shift_flags); - *nmsedec += opj_t1_getnmsedec_ref(opj_int_abs(*datap), bpno); + *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), + (OPJ_UINT32)bpno); v = opj_int_abs(*datap) & one ? 1 : 0; #ifdef DEBUG_ENC_REF fprintf(stderr, " ctxt=%d\n", ctxt); @@ -1286,7 +1290,7 @@ static void opj_t1_enc_clnpass_step( OPJ_UINT32 lim, OPJ_UINT32 cblksty) { - OPJ_INT32 v; + OPJ_UINT32 v; OPJ_UINT32 ci; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -1338,7 +1342,8 @@ LABEL_PARTIAL: *flagsp & vsc_mask, flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); - *nmsedec += opj_t1_getnmsedec_sig(opj_int_abs(*datap), bpno); + *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), + (OPJ_UINT32)bpno); ctxt2 = opj_t1_enc_getctxno_sc(lu); #ifdef DEBUG_ENC_CLN printf(" ctxt2=%d\n", ctxt2); @@ -1873,6 +1878,8 @@ static OPJ_BOOL opj_t1_allocate_buffers( if (!t1->encoder) { + size_t colflags_size; + if (flagssize > (size_t)t1->flagssize) { /* Overflow check */ if (flagssize > (SIZE_MAX / sizeof(opj_flag_t))) { @@ -1898,8 +1905,8 @@ static OPJ_BOOL opj_t1_allocate_buffers( memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); - size_t colflags_size = ((((size_t)h + 3U) / 4U) + - 2U); /* Can't overflow, h checked against UINT32_MAX - 3U */ + colflags_size = ((((size_t)h + 3U) / 4U) + + 2U); /* Can't overflow, h checked against UINT32_MAX - 3U */ /* Overflow check */ if (colflags_size > (SIZE_MAX / (size_t)t1->flags_stride)) { From 68557ff5036e68112bcbf194d2a7f48b1f58b752 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 31 May 2017 14:35:56 +0200 Subject: [PATCH 07/19] T1: Transpose coder optimizations to decoder, and cleanup code --- src/lib/openjp2/mqc.h | 5 +- src/lib/openjp2/mqc_inl.h | 22 +- src/lib/openjp2/t1.c | 906 ++++++++++------------------- src/lib/openjp2/t1.h | 72 +-- src/lib/openjp2/t1_generate_luts.c | 224 +------ src/lib/openjp2/t1_luts.h | 67 +-- 6 files changed, 328 insertions(+), 968 deletions(-) diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h index f21d46ef..4a3e3c88 100644 --- a/src/lib/openjp2/mqc.h +++ b/src/lib/openjp2/mqc.h @@ -77,7 +77,8 @@ typedef struct opj_mqc { OPJ_BYTE *end; opj_mqc_state_t *ctxs[MQC_NUMCTXS]; opj_mqc_state_t **curctx; - const OPJ_BYTE *lut_ctxno_zc_orient; /* lut_ctxno_zc shifted by 256 * bandno */ + /* lut_ctxno_zc shifted by (1 << 9) * bandno */ + const OPJ_BYTE* lut_ctxno_zc_orient; } opj_mqc_t; #include "mqc_inl.h" @@ -199,7 +200,7 @@ Decode a symbol @param mqc MQC handle @return Returns the decoded symbol (0 or 1) */ -static INLINE OPJ_INT32 opj_mqc_decode(opj_mqc_t * const mqc); +static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t * const mqc); /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/src/lib/openjp2/mqc_inl.h b/src/lib/openjp2/mqc_inl.h index ca3991d7..451ce02d 100644 --- a/src/lib/openjp2/mqc_inl.h +++ b/src/lib/openjp2/mqc_inl.h @@ -43,14 +43,14 @@ FIXME DOC @param mqc MQC handle @return */ -static INLINE OPJ_INT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc) +static INLINE OPJ_UINT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc) { - OPJ_INT32 d; + OPJ_UINT32 d; if (mqc->a < (*mqc->curctx)->qeval) { - d = (OPJ_INT32)(1 - (*mqc->curctx)->mps); + d = !((*mqc->curctx)->mps); *mqc->curctx = (*mqc->curctx)->nlps; } else { - d = (OPJ_INT32)(*mqc->curctx)->mps; + d = (*mqc->curctx)->mps; *mqc->curctx = (*mqc->curctx)->nmps; } @@ -62,16 +62,16 @@ FIXME DOC @param mqc MQC handle @return */ -static INLINE OPJ_INT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc) +static INLINE OPJ_UINT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc) { - OPJ_INT32 d; + OPJ_UINT32 d; if (mqc->a < (*mqc->curctx)->qeval) { mqc->a = (*mqc->curctx)->qeval; - d = (OPJ_INT32)(*mqc->curctx)->mps; + d = (*mqc->curctx)->mps; *mqc->curctx = (*mqc->curctx)->nmps; } else { mqc->a = (*mqc->curctx)->qeval; - d = (OPJ_INT32)(1 - (*mqc->curctx)->mps); + d = !((*mqc->curctx)->mps); *mqc->curctx = (*mqc->curctx)->nlps; } @@ -136,13 +136,13 @@ Decode a symbol @param mqc MQC handle @return Returns the decoded symbol (0 or 1) */ -static INLINE OPJ_INT32 opj_mqc_decode(opj_mqc_t *const mqc) +static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t *const mqc) { /* Implements ISO 15444-1 C.3.2 Decoding a decision (DECODE) */ /* Note: alternate "J.2 - Decoding an MPS or an LPS in the */ /* software-conventions decoder" has been tried, but does not bring any */ /* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */ - OPJ_INT32 d; + OPJ_UINT32 d; mqc->a -= (*mqc->curctx)->qeval; if ((mqc->c >> 16) < (*mqc->curctx)->qeval) { d = opj_mqc_lpsexchange(mqc); @@ -153,7 +153,7 @@ static INLINE OPJ_INT32 opj_mqc_decode(opj_mqc_t *const mqc) d = opj_mqc_mpsexchange(mqc); opj_mqc_renormd(mqc); } else { - d = (OPJ_INT32)(*mqc->curctx)->mps; + d = (*mqc->curctx)->mps; } } diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 6f46bfa8..bfff8503 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -41,27 +41,20 @@ #include "opj_includes.h" #include "t1_luts.h" -/* #define CONSISTENCY_CHECK */ - /** @defgroup T1 T1 - Implementation of the tier-1 coding */ /*@{*/ -#define ENC_FLAGS(x, y) (t1->enc_flags[x + 1 + ((y / 4) + 1) * t1->flags_stride]) +#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * t1->flags_stride]) /** @name Local static functions */ /*@{*/ -static INLINE OPJ_BYTE opj_t1_enc_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient); static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f); -static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f); -static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f); -static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); +static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); -static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, - OPJ_UINT32 s, OPJ_UINT32 stride); -static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, - OPJ_UINT32 stride); +static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, + OPJ_UINT32 s, OPJ_UINT32 stride); /** @@ -71,7 +64,6 @@ Decode significant pass static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, @@ -79,7 +71,6 @@ static INLINE void opj_t1_dec_sigpass_step_raw( static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_UINT32 row, @@ -87,7 +78,6 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, @@ -99,7 +89,6 @@ Encode significant pass */ static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 cblksty); @@ -143,7 +132,7 @@ Decode refinement pass static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, - opj_colflag_t *colflagsp, + opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, @@ -151,7 +140,6 @@ static INLINE void opj_t1_dec_refpass_step_raw( static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, @@ -159,7 +147,6 @@ static INLINE void opj_t1_dec_refpass_step_mqc( static INLINE void opj_t1_dec_refpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, @@ -173,21 +160,18 @@ Decode clean-up pass static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_UINT32 row); static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_UINT32 row); static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 partial, @@ -199,7 +183,6 @@ Encode clean-up pass static void opj_t1_enc_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty); @@ -252,17 +235,12 @@ static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1, /* ----------------------------------------------------------------------- */ -static INLINE OPJ_BYTE opj_t1_enc_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient) +static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f) { - return lut_enc_ctxno_zc[(orient << 9) | (f & T1_SIGMA_NEIGHBOURS)]; + return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)]; } -static OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f) -{ - return mqc->lut_ctxno_zc_orient[(f & T1_SIG_OTH)]; -} - -static INLINE OPJ_UINT32 opj_t1_enc_getctxtno_sc_or_spb_index(OPJ_UINT32 fX, +static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX, OPJ_UINT32 pfX, OPJ_UINT32 nfX, OPJ_UINT32 ci) @@ -292,31 +270,21 @@ static INLINE OPJ_UINT32 opj_t1_enc_getctxtno_sc_or_spb_index(OPJ_UINT32 fX, return lu; } -static INLINE OPJ_BYTE opj_t1_enc_getctxno_sc(OPJ_UINT32 lu) +static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu) { - return lut_enc_ctxno_sc[lu]; + return lut_ctxno_sc[lu]; } -static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f) -{ - return lut_ctxno_sc[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; -} - -static INLINE OPJ_UINT32 opj_t1_enc_getctxno_mag(OPJ_UINT32 f) +static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f) { OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp; return tmp2; } -static INLINE OPJ_BYTE opj_t1_enc_getspb(OPJ_UINT32 lu) +static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu) { - return lut_enc_spb[lu]; -} - -static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f) -{ - return lut_spb[(f & (T1_SIG_PRIM | T1_SGN)) >> 4]; + return lut_spb[lu]; } static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos) @@ -338,12 +306,12 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) } -static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, - OPJ_UINT32 s, OPJ_UINT32 stride) +static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, + OPJ_UINT32 s, OPJ_UINT32 stride) { /* set up to point to the north and south data points' flags words, if required */ - opj_flag_enc_t* north; - opj_flag_enc_t* south; + opj_flag_t* north; + opj_flag_t* south; /* mark target as significant */ *flagsp |= T1_SIGMA_4 << (3U * ci); @@ -395,83 +363,12 @@ static INLINE void opj_t1_enc_updateflags(opj_flag_enc_t *flagsp, OPJ_UINT32 ci, } } -static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, - OPJ_UINT32 stride) -{ - opj_flag_t *np = flagsp - stride; - opj_flag_t *sp = flagsp + stride; - - /* We strongly rely on (T1_SGN_N == 0x0100) == (T1_SIG_N == 0x0010) << 4 */ - /* and T1_SIG_E == T1_SIG_N << 1, T1_SIG_W == T1_SIG_N << 2 and T1_SIG_S == T1_SIG_N << 2 */ - /* and T1_SGN_E == T1_SGN_N << 1, T1_SGN_W == T1_SGN_N << 2 and T1_SGN_S == T1_SGN_N << 2 */ - - unsigned int flag_N = T1_SIG_N | ((unsigned int)T1_SIG_N << (4U * s)); - - np[-1] |= T1_SIG_SE; - np[0] |= (opj_flag_t)(flag_N << 2U); - np[1] |= T1_SIG_SW; - - flagsp[-1] |= (opj_flag_t)(flag_N << 1U); -#ifdef CONSISTENCY_CHECK - flagsp[0] |= T1_SIG; -#endif - flagsp[1] |= (opj_flag_t)(flag_N << 3U); - - sp[-1] |= T1_SIG_NE; - sp[0] = (opj_flag_t)(sp[0] | flag_N); - sp[1] |= T1_SIG_NW; -} - -static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_UINT32 s, OPJ_UINT32 stride, OPJ_UINT32 row) -{ - opj_t1_updateflags(flagsp, s, stride); - if (row == 0U) { - *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U)))); - *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U)))); - *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U)))); - *(colflagsp - stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); - *(colflagsp - stride) |= (T1_COLFLAG_SIG_OTHER_ROW_3); - *(colflagsp - stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); - } else if (row == 3U) { - *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U)))); - *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U)))); - *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1)))); - *(colflagsp + stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0); - *(colflagsp + stride) |= (T1_COLFLAG_SIG_OTHER_ROW_0); - *(colflagsp + stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0); - } else { - *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U)))); - *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U)))); - *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U)))); - } -} - /** Encode significant pass */ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, - opj_flag_enc_t *flagsp, + opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, @@ -490,13 +387,12 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { - OPJ_UINT32 ctxt1 = opj_t1_enc_getctxno_zc(flags >> (ci * 3U), - orient); + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); v = opj_int_abs(*datap) & one ? 1 : 0; #ifdef DEBUG_ENC_SIG fprintf(stderr, " ctxt1=%d\n", ctxt1); #endif - opj_mqc_setcurctx(mqc, ctxt1); /* ESSAI */ + opj_mqc_setcurctx(mqc, ctxt1); if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ opj_mqc_bypass_enc(mqc, v); } else { @@ -505,28 +401,28 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, if (v) { /* Note: using flags instead of *flagsp & vsc_mask result */ /* in slow down. Probably because of register pressure */ - OPJ_UINT32 lu = opj_t1_enc_getctxtno_sc_or_spb_index( + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( *flagsp & vsc_mask, flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); - OPJ_UINT32 ctxt2 = opj_t1_enc_getctxno_sc(lu); + OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); v = *datap < 0 ? 1 : 0; *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), (OPJ_UINT32)bpno); #ifdef DEBUG_ENC_SIG fprintf(stderr, " ctxt2=%d\n", ctxt2); #endif - opj_mqc_setcurctx(mqc, ctxt2); /* ESSAI */ + opj_mqc_setcurctx(mqc, ctxt2); if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ opj_mqc_bypass_enc(mqc, v); } else { - OPJ_UINT32 spb = opj_t1_enc_getspb(lu); + OPJ_UINT32 spb = opj_t1_getspb(lu); #ifdef DEBUG_ENC_SIG fprintf(stderr, " spb=%d\n", spb); #endif opj_mqc_encode(mqc, v ^ spb); } - opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); } *flagsp |= T1_PI_THIS << (ci * 3U); } @@ -535,107 +431,101 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, - OPJ_UINT32 row) + OPJ_UINT32 ci) { - OPJ_UINT32 v, flag; + OPJ_UINT32 v; opj_raw_t *raw = t1->raw; /* RAW component */ - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : - (*flagsp); - if ((flag & T1_SIG_OTH) && - !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row)))) { + OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | + T1_CHI_S) << (ci * 3U)) : ~0U; + OPJ_UINT32 const flags = *flagsp & vsc_mask; + + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { if (opj_raw_decode(raw)) { - v = opj_raw_decode(raw); /* ESSAI */ + v = opj_raw_decode(raw); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); } -#ifdef CONSISTENCY_CHECK - *flagsp |= T1_VISIT; -#endif - *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_VISIT_ROW_0 << - (T1_COLFLAG_RBS * row))); + *flagsp |= T1_PI_THIS << (ci * 3U); } } static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_UINT32 row, + OPJ_UINT32 ci, OPJ_UINT32 flags_stride) { - OPJ_INT32 v, flag; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ -#ifdef CONSISTENCY_CHECK - assert(((*flagsp & T1_SIG_OTH) && !(*flagsp & (T1_SIG | T1_VISIT))) == - ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | - T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) == - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)))); -#endif - if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | - T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) == - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))) { - flag = *flagsp; - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); + OPJ_UINT32 const flags = *flagsp; + + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( + *flagsp, + flagsp[-1], flagsp[1], + ci); + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); + opj_mqc_setcurctx(mqc, ctxt1); if (opj_mqc_decode(mqc)) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); + OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); + OPJ_UINT32 spb = opj_t1_getspb(lu); + opj_mqc_setcurctx(mqc, ctxt2); + v = opj_mqc_decode(mqc) ^ spb; *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, flags_stride, row); + opj_t1_update_flags(flagsp, ci, v, flags_stride); } -#ifdef CONSISTENCY_CHECK - *flagsp |= T1_VISIT; -#endif - *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_VISIT_ROW_0 << - (T1_COLFLAG_RBS * row))); + *flagsp |= T1_PI_THIS << (ci * 3U); } -} /* VSC and BYPASS by Antonin */ +} static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, - OPJ_UINT32 row) + OPJ_UINT32 ci) { - OPJ_UINT32 v, flag; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : - (*flagsp); - if ((flag & T1_SIG_OTH) && - !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row)))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); + OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | + T1_CHI_S) << (ci * 3U)) : ~0U; + OPJ_UINT32 const flags = *flagsp & vsc_mask; + + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( + *flagsp & vsc_mask, + flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + ci); + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); + opj_mqc_setcurctx(mqc, ctxt1); if (opj_mqc_decode(mqc)) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); - v = (OPJ_UINT32)opj_mqc_decode(mqc) ^ opj_t1_getspb(flag); + OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); + OPJ_UINT32 spb = opj_t1_getspb(lu); + opj_mqc_setcurctx(mqc, ctxt2); + v = opj_mqc_decode(mqc) ^ spb; *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); } -#ifdef CONSISTENCY_CHECK - *flagsp |= T1_VISIT; -#endif - *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_VISIT_ROW_0 << - (T1_COLFLAG_RBS * row))); + *flagsp |= T1_PI_THIS << (ci * 3U); } -} /* VSC and BYPASS by Antonin */ +} static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 cblksty @@ -643,7 +533,7 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, { OPJ_UINT32 i, k; OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); - opj_flag_enc_t* f = &ENC_FLAGS(0, 0); + opj_flag_t* f = &T1_FLAGS(0, 0); OPJ_UINT32 const extra = t1->flags_stride - t1->w; *nmsedec = 0; @@ -667,7 +557,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, t1, f, &t1->data[((k + 0) * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -677,7 +566,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, t1, f, &t1->data[((k + 1) * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -687,7 +575,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, t1, f, &t1->data[((k + 2) * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -697,7 +584,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, t1, f, &t1->data[((k + 3) * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -727,7 +613,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, t1, f, &t1->data[(j * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -747,70 +632,60 @@ static void opj_t1_dec_sigpass_raw( { OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; - opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; + opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; half = one >> 1; oneplushalf = one | half; for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { - opj_colflag_t *colflags2 = colflags1 + i; + opj_flag_t *flagsp2 = flagsp + i; for (j = k; j < k + 4 && j < t1->h; ++j) { vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; opj_t1_dec_sigpass_step_raw( t1, - &t1->flags[((j + 1) * t1->flags_stride) + i + 1], - colflags2, + flagsp2, &t1->data[(j * t1->w) + i], oneplushalf, vsc, j - k); } } - colflags1 += t1->flags_stride; + flagsp += t1->flags_stride; } -} /* VSC and BYPASS by Antonin */ +} #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flags1 = &t1->flags[1]; \ - opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ + opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ one = 1 << bpno; \ half = one >> 1; \ oneplushalf = one | half; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flags2 = flags1 + i; \ - opj_colflag_t *colflags2 = colflags1 + i; \ - if( *colflags2 == 0 ) continue; \ - flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \ + opj_flag_t *flagsp2 = flagsp1 + i; \ + if( *flagsp2 == 0 ) continue; \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 0U, flags_stride); \ data2 += w; \ - flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 1U, flags_stride); \ data2 += w; \ - flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 2U, flags_stride); \ data2 += w; \ - flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 3U, flags_stride); \ data2 += w; \ } \ data1 += w << 2; \ - flags1 += flags_stride << 2; \ - colflags1 += flags_stride; \ + flagsp1 += flags_stride; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flags2 = flags1 + i; \ - opj_colflag_t *colflags2 = colflags1 + i; \ + opj_flag_t *flagsp2 = flagsp1 + i; \ for (j = k; j < h; ++j) { \ - flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, j - k, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, j - k, flags_stride); \ data2 += w; \ } \ } \ @@ -830,7 +705,6 @@ static void opj_t1_dec_sigpass_mqc_generic( opj_t1_dec_sigpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride); } -/* VSC and BYPASS by Antonin */ static void opj_t1_dec_sigpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno) @@ -838,67 +712,54 @@ static void opj_t1_dec_sigpass_mqc_vsc( OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; OPJ_INT32 *data1 = t1->data; - opj_flag_t *flags1 = &t1->flags[1]; - opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; + opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); one = 1 << bpno; half = one >> 1; oneplushalf = one | half; - for (k = 0; k < (t1->h & ~(OPJ_UINT32)3); k += 4U) { + for (k = 0; k < (t1->h & ~3U); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - opj_colflag_t *colflags2 = colflags1 + i; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, - 0U); + opj_flag_t *flagsp2 = flagsp1 + i; + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 0, 0U); data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, - 1U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 0, 1U); data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, - 2U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 0, 2U); data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 1, - 3U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 1, 3U); data2 += t1->w; } data1 += t1->w << 2; - flags1 += t1->flags_stride << 2; - colflags1 += t1->flags_stride; + flagsp1 += t1->flags_stride; } for (i = 0; i < t1->w; ++i) { - opj_colflag_t *colflags2 = colflags1 + i; + opj_flag_t *flagsp2 = flagsp1 + i; for (j = k; j < t1->h; ++j) { vsc = (j == t1->h - 1) ? 1 : 0; opj_t1_dec_sigpass_step_mqc_vsc( t1, - &t1->flags[((j + 1) * t1->flags_stride) + i + 1], - colflags2, + flagsp2, &t1->data[(j * t1->w) + i], oneplushalf, vsc, j - k); } } -} /* VSC and BYPASS by Antonin */ +} /** Encode refinement pass step */ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, - opj_flag_enc_t *flagsp, + opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 ci, - OPJ_UINT32 vsc - ) + OPJ_UINT32 vsc) { OPJ_UINT32 v; @@ -911,14 +772,14 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, (*flagsp >> (ci * 3U)); if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { - OPJ_UINT32 ctxt = opj_t1_enc_getctxno_mag(shift_flags); + OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), (OPJ_UINT32)bpno); v = opj_int_abs(*datap) & one ? 1 : 0; #ifdef DEBUG_ENC_REF fprintf(stderr, " ctxt=%d\n", ctxt); #endif - opj_mqc_setcurctx(mqc, ctxt); /* ESSAI */ + opj_mqc_setcurctx(mqc, ctxt); if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ opj_mqc_bypass_enc(mqc, v); } else { @@ -931,98 +792,79 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, - opj_colflag_t *colflagsp, + opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_UINT32 row) + OPJ_UINT32 ci) { OPJ_INT32 v, t; opj_raw_t *raw = t1->raw; /* RAW component */ - if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row))) == - ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { + if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == + (T1_SIGMA_THIS << (ci * 3U))) { v = (OPJ_INT32)opj_raw_decode(raw); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_REFINE_ROW_0 << - (T1_COLFLAG_RBS * row))); + *flagsp |= T1_MU_THIS << (ci * 3U); } -} /* VSC and BYPASS by Antonin */ +} static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, -#ifdef CONSISTENCY_CHECK opj_flag_t *flagsp, -#else - opj_flag_t *flagsp_unused, -#endif - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_UINT32 row) -{ - OPJ_INT32 v, t; - - opj_mqc_t *mqc = t1->mqc; /* MQC component */ -#ifdef CONSISTENCY_CHECK - assert(((*flagsp & (T1_SIG | T1_VISIT)) == T1_SIG) == - ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * - row)))); -#else - (void)flagsp_unused; -#endif - if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row))) == - ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { - OPJ_UINT32 tmp1 = (*colflagsp & (T1_COLFLAG_SIG_OTHER_ROW_0 << - (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; - OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << - (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1; - opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */ - v = opj_mqc_decode(mqc); - t = v ? poshalf : neghalf; - *datap += *datap < 0 ? -t : t; - *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_REFINE_ROW_0 << - (T1_COLFLAG_RBS * row))); - } -} /* VSC and BYPASS by Antonin */ - -static INLINE void opj_t1_dec_refpass_step_mqc_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - opj_colflag_t *colflagsp, - OPJ_INT32 *datap, - OPJ_INT32 poshalf, - OPJ_INT32 neghalf, - OPJ_INT32 vsc, - OPJ_UINT32 row) + OPJ_UINT32 ci) { OPJ_UINT32 v; OPJ_INT32 t; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row))) == - ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { - OPJ_UINT32 flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | - T1_SGN_S))) : (*flagsp); - OPJ_UINT32 tmp1 = (flag & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; - OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << - (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2U : tmp1; - opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */ - v = (OPJ_UINT32)opj_mqc_decode(mqc); + if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == + (T1_SIGMA_THIS << (ci * 3U))) { + OPJ_UINT32 ctxt = opj_t1_getctxno_mag(*flagsp >> (ci * 3U)); + opj_mqc_setcurctx(mqc, ctxt); + v = opj_mqc_decode(mqc); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_REFINE_ROW_0 << - (T1_COLFLAG_RBS * row))); + *flagsp |= T1_MU_THIS << (ci * 3U); } -} /* VSC and BYPASS by Antonin */ +} + +static INLINE void opj_t1_dec_refpass_step_mqc_vsc( + opj_t1_t *t1, + opj_flag_t *flagsp, + OPJ_INT32 *datap, + OPJ_INT32 poshalf, + OPJ_INT32 neghalf, + OPJ_INT32 vsc, + OPJ_UINT32 ci) +{ + OPJ_UINT32 v; + OPJ_INT32 t; + + opj_mqc_t *mqc = t1->mqc; /* MQC component */ + + OPJ_UINT32 const shift_flags = + vsc ? + ((*flagsp >> (ci * 3U)) & ~(T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S)) + : + (*flagsp >> (ci * 3U)); + + + if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { + OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); + opj_mqc_setcurctx(mqc, ctxt); + v = opj_mqc_decode(mqc); + t = v ? poshalf : neghalf; + *datap += *datap < 0 ? -t : t; + *flagsp |= T1_MU_THIS << (ci * 3U); + } +} static void opj_t1_enc_refpass( opj_t1_t *t1, @@ -1033,7 +875,7 @@ static void opj_t1_enc_refpass( { OPJ_UINT32 i, k; const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); - opj_flag_enc_t* f = &ENC_FLAGS(0, 0); + opj_flag_t* f = &T1_FLAGS(0, 0); const OPJ_UINT32 extra = t1->flags_stride - t1->w; *nmsedec = 0; @@ -1139,66 +981,57 @@ static void opj_t1_dec_refpass_raw( { OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; - opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; + opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { - opj_colflag_t *colflags2 = colflags1 + i; + opj_flag_t *flagsp2 = flagsp1 + i; for (j = k; j < k + 4 && j < t1->h; ++j) { opj_t1_dec_refpass_step_raw( t1, - colflags2, + flagsp2, &t1->data[(j * t1->w) + i], poshalf, neghalf, j - k); } } - colflags1 += t1->flags_stride; + flagsp1 += t1->flags_stride; } -} /* VSC and BYPASS by Antonin */ +} #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \ { \ OPJ_INT32 one, poshalf, neghalf; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flags1 = &t1->flags[1]; \ - opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ + opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ one = 1 << bpno; \ poshalf = one >> 1; \ neghalf = bpno > 0 ? -poshalf : -1; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flags2 = flags1 + i; \ - opj_colflag_t *colflags2 = colflags1 + i; \ - if( *colflags2 == 0 ) continue; \ - flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 0U); \ + opj_flag_t *flagsp2 = flagsp1 + i; \ + if( *flagsp2 == 0 ) continue; \ + opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 0U); \ data2 += w; \ - flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 1U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 1U); \ data2 += w; \ - flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 2U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 2U); \ data2 += w; \ - flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 3U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 3U); \ data2 += w; \ } \ data1 += w << 2; \ - flags1 += flags_stride << 2; \ - colflags1 += flags_stride; \ + flagsp1 += flags_stride; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flags2 = flags1 + i; \ - opj_colflag_t *colflags2 = colflags1 + i; \ + opj_flag_t *flagsp2 = flagsp1 + i; \ for (j = k; j < h; ++j) { \ - flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, j - k); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, j - k); \ data2 += w; \ } \ } \ @@ -1218,7 +1051,6 @@ static void opj_t1_dec_refpass_mqc_generic( opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride); } -/* VSC and BYPASS by Antonin */ static void opj_t1_dec_refpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno) @@ -1227,61 +1059,52 @@ static void opj_t1_dec_refpass_mqc_vsc( OPJ_UINT32 i, j, k; OPJ_INT32 vsc; OPJ_INT32 *data1 = t1->data; - opj_flag_t *flags1 = &t1->flags[1]; - opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; + opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < (t1->h & ~(OPJ_UINT32)3U); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - opj_colflag_t *colflags2 = colflags1 + i; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, + opj_flag_t *flagsp2 = flagsp1 + i; + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, 0, 0U); data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, 0, 1U); data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, 0, 2U); data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, 1, 3U); data2 += t1->w; } data1 += t1->w << 2; - flags1 += t1->flags_stride << 2; - colflags1 += t1->flags_stride; + flagsp1 += t1->flags_stride; } for (i = 0; i < t1->w; ++i) { - opj_colflag_t *colflags2 = colflags1 + i; + opj_flag_t *flagsp2 = flagsp1 + i; for (j = k; j < t1->h; ++j) { vsc = (j == t1->h - 1) ? 1 : 0; opj_t1_dec_refpass_step_mqc_vsc( t1, - &t1->flags[((j + 1) * t1->flags_stride) + i + 1], - colflags2, + flagsp2, &t1->data[(j * t1->w) + i], poshalf, neghalf, vsc, j - k); } } -} /* VSC and BYPASS by Antonin */ +} /** Encode clean-up pass step */ static void opj_t1_enc_clnpass_step( opj_t1_t *t1, - opj_flag_enc_t *flagsp, + opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, @@ -1313,7 +1136,7 @@ static void opj_t1_enc_clnpass_step( for (ci = runlen; ci < lim; ++ci) { OPJ_UINT32 vsc; OPJ_UINT32 vsc_mask; - opj_flag_enc_t flags; + opj_flag_t flags; vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == lim - 1)) ? 1 : 0; vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << @@ -1325,7 +1148,7 @@ static void opj_t1_enc_clnpass_step( } if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { - OPJ_UINT32 ctxt1 = opj_t1_enc_getctxno_zc(flags >> (ci * 3U), orient); + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); #ifdef DEBUG_ENC_CLN printf(" ctxt1=%d\n", ctxt1); #endif @@ -1338,25 +1161,25 @@ static void opj_t1_enc_clnpass_step( LABEL_PARTIAL: /* Note: using flags instead of *flagsp & vsc_mask result */ /* in slow down. Probably because of register pressure */ - lu = opj_t1_enc_getctxtno_sc_or_spb_index( + lu = opj_t1_getctxtno_sc_or_spb_index( *flagsp & vsc_mask, flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, ci); *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), (OPJ_UINT32)bpno); - ctxt2 = opj_t1_enc_getctxno_sc(lu); + ctxt2 = opj_t1_getctxno_sc(lu); #ifdef DEBUG_ENC_CLN printf(" ctxt2=%d\n", ctxt2); #endif opj_mqc_setcurctx(mqc, ctxt2); v = *datap < 0 ? 1 : 0; - spb = opj_t1_enc_getspb(lu); + spb = opj_t1_getspb(lu); #ifdef DEBUG_ENC_CLN printf(" spb=%d\n", spb); #endif opj_mqc_encode(mqc, v ^ spb); - opj_t1_enc_updateflags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); } } *flagsp &= ~(T1_PI_THIS << (3U * ci)); @@ -1368,126 +1191,116 @@ LABEL_PARTIAL: static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_UINT32 row) + OPJ_UINT32 ci) { - OPJ_INT32 v, flag; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = *flagsp; - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( + *flagsp, flagsp[-1], flagsp[1], + ci); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); + v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, - row); -#ifdef CONSISTENCY_CHECK - *flagsp &= ~T1_VISIT; -#endif -} /* VSC and BYPASS by Antonin */ + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ +} static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_UINT32 row) + OPJ_UINT32 ci) { - OPJ_INT32 v, flag; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ -#ifdef CONSISTENCY_CHECK - assert((!(*flagsp & (T1_SIG | T1_VISIT))) == (!(*colflagsp & (( - T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (4 * row))))); -#endif - if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (4 * row)))) { - flag = *flagsp; - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); + if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); + opj_mqc_setcurctx(mqc, ctxt1); if (opj_mqc_decode(mqc)) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( + *flagsp, flagsp[-1], flagsp[1], + ci); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); + v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, - row); + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); } } -#ifdef CONSISTENCY_CHECK - *flagsp &= ~T1_VISIT; -#endif -} /* VSC and BYPASS by Antonin */ + /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ +} static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_UINT32 row, + OPJ_UINT32 ci, OPJ_UINT32 flags_stride) { - OPJ_INT32 v; - OPJ_INT32 flag; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = *flagsp; - /*if (!(flag & (T1_SIG | T1_VISIT)))*/ - { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); - if (opj_mqc_decode(mqc)) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, flags_stride, row); - } + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); + opj_mqc_setcurctx(mqc, ctxt1); + if (opj_mqc_decode(mqc)) { + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( + *flagsp, flagsp[-1], flagsp[1], + ci); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); + v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); + *datap = v ? -oneplushalf : oneplushalf; + opj_t1_update_flags(flagsp, ci, v, flags_stride); } - /*flagsp &= ~T1_VISIT;*/ } static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 partial, OPJ_INT32 vsc, - OPJ_UINT32 row) + OPJ_UINT32 ci) { - OPJ_UINT32 v, flag; + OPJ_UINT32 v; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : - (*flagsp); + OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | + T1_CHI_S) << + (ci * 3U)) : ~0U; + OPJ_UINT32 flags = *flagsp & vsc_mask; if (partial) { goto LABEL_PARTIAL; } - if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << - (T1_COLFLAG_RBS * row)))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); + if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); + opj_mqc_setcurctx(mqc, ctxt1); if (opj_mqc_decode(mqc)) { + OPJ_UINT32 lu; LABEL_PARTIAL: - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); - v = (OPJ_UINT32)opj_mqc_decode(mqc) ^ opj_t1_getspb(flag); + lu = opj_t1_getctxtno_sc_or_spb_index( + *flagsp & vsc_mask, flagsp[-1] & vsc_mask, + flagsp[1] & vsc_mask, + ci); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); + v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); + opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); } } -#ifdef CONSISTENCY_CHECK - *flagsp &= ~T1_VISIT; -#endif + /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ } -#define MACRO_t1_enc_flags(x,y) t1->enc_flags[((x)*(t1->flags_stride))+(y)] - static void opj_t1_enc_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty) { @@ -1513,7 +1326,7 @@ static void opj_t1_enc_clnpass( #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); #endif - agg = !(ENC_FLAGS(i, k) & agg_mask); + agg = !(T1_FLAGS(i, k) & agg_mask); #ifdef DEBUG_ENC_CLN printf(" agg=%d\n", agg); #endif @@ -1536,9 +1349,8 @@ static void opj_t1_enc_clnpass( } opj_t1_enc_clnpass_step( t1, - &ENC_FLAGS(i, k), + &T1_FLAGS(i, k), &t1->data[((k + runlen) * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -1561,9 +1373,8 @@ static void opj_t1_enc_clnpass( #endif opj_t1_enc_clnpass_step( t1, - &ENC_FLAGS(i, k), + &T1_FLAGS(i, k), &t1->data[((k + runlen) * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -1575,20 +1386,10 @@ static void opj_t1_enc_clnpass( } } - -#define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] - -#ifdef CONSISTENCY_CHECK -#define REMOVE_VISIT_FLAG(flagptr) *(flagptr) &= (opj_flag_t)~T1_VISIT -#define T1_SIG_OR_VISIT (T1_SIG | T1_VISIT) -#else -#define REMOVE_VISIT_FLAG(flagptr) do {} while(0) -#define T1_SIG_OR_VISIT 0 -#endif - -#define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \ +#define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ { \ - OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ + OPJ_INT32 one, half, oneplushalf, agg, vsc; \ + OPJ_UINT32 runlen; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ \ @@ -1598,17 +1399,13 @@ static void opj_t1_enc_clnpass( half = one >> 1; \ oneplushalf = one | half; \ if (cblksty & J2K_CCP_CBLKSTY_VSC) { \ - opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ + opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ + const OPJ_UINT32 agg_mask = ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | T1_CHI_5); \ for (k = 0; k < h; k += 4) { \ for (i = 0; i < w; ++i) { \ - opj_colflag_t *colflags2 = colflags1 + i; \ + opj_flag_t *flagsp2 = flagsp1 + i; \ if (k + 3 < h) { \ - agg = !((*colflags2 & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \ - T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \ - T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \ - T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3)) || \ - ((MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \ - & ((~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG_OTH)))); \ + agg = !(*flagsp2 & agg_mask); \ } else { \ agg = 0; \ } \ @@ -1623,42 +1420,28 @@ static void opj_t1_enc_clnpass( } else { \ runlen = 0; \ } \ - for (j = k + (OPJ_UINT32)runlen; j < k + 4 && j < h; ++j) { \ + for (j = k + runlen; j < k + 4 && j < h; ++j) { \ vsc = (j == k + 3 || j == h - 1) ? 1 : 0; \ opj_t1_dec_clnpass_step_vsc( \ t1, \ - &t1->flags[((j+1) * flags_stride) + i + 1], \ - colflags2, \ + flagsp2, \ &t1->data[(j * w) + i], \ oneplushalf, \ - agg && (j == k + (OPJ_UINT32)runlen), \ + agg && (j == k + runlen), \ vsc, j - k); \ } \ - *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ + *flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ - colflags1 += flags_stride; \ + flagsp1 += flags_stride; \ } \ } else { \ OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flags1 = &t1->flags[1]; \ - opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ + opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flags2 = flags1 + i; \ - opj_colflag_t *colflags2 = colflags1 + i; \ - opj_colflag_t colflags = *colflags2; \ - agg = !(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \ - T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \ - T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \ - T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3 | T1_COLFLAG_SIG_OTHER_ROW_3)); \ - if( consistency_check ) { \ - assert( agg == !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG_OR_VISIT | T1_SIG_OTH)) ); \ - } \ - if (agg) { \ + opj_flag_t *flagsp2 = flagsp1 + i; \ + if (*flagsp2 == 0) { \ opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ if (!opj_mqc_decode(mqc)) { \ continue; \ @@ -1666,68 +1449,51 @@ static void opj_t1_enc_clnpass( opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ runlen = opj_mqc_decode(mqc); \ runlen = (runlen << 1) | opj_mqc_decode(mqc); \ - flags2 += (OPJ_UINT32)runlen * flags_stride; \ - data2 += (OPJ_UINT32)runlen * w; \ - for (j = (OPJ_UINT32)runlen; j < 4; ++j) { \ - flags2 += flags_stride; \ - if (j == (OPJ_UINT32)runlen) { \ - opj_t1_dec_clnpass_step_partial(t1, flags2, colflags2, data2, oneplushalf, j); \ + data2 += runlen * w; \ + for (j = runlen; j < 4; ++j) { \ + if (j == runlen) { \ + opj_t1_dec_clnpass_step_partial(t1, flagsp2, data2, oneplushalf, j); \ } else { \ - opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j); \ + opj_t1_dec_clnpass_step(t1, flagsp2, data2, oneplushalf, j); \ } \ data2 += w; \ } \ } else { \ - flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ - if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \ + if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (0 * 3U)))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 0U, flags_stride); \ } \ - if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ - flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ - if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \ + if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (1 * 3U)))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 1U, flags_stride); \ } \ - if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ - flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ - if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \ + if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (2 * 3U)))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 2U, flags_stride); \ } \ - if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ - flags2 += flags_stride; \ - if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG_OR_VISIT))) ); } \ - if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \ + if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (3 * 3U)))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 3U, flags_stride); \ } \ - if( consistency_check ) REMOVE_VISIT_FLAG(*flags2); \ data2 += w; \ } \ - *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ + *flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ data1 += w << 2; \ - flags1 += flags_stride << 2; \ - colflags1 += flags_stride; \ + flagsp1 += flags_stride; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flags2 = flags1 + i; \ - opj_colflag_t *colflags2 = colflags1 + i; \ + opj_flag_t *flagsp2 = flagsp1 + i; \ for (j = k; j < h; ++j) { \ - flags2 += flags_stride; \ - opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j - k); \ + opj_t1_dec_clnpass_step(t1, flagsp2, data2, oneplushalf, j - k); \ data2 += w; \ } \ - *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ + *flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ } \ \ if (segsym) { \ - OPJ_INT32 v = 0; \ + OPJ_UINT32 v = 0; \ opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ v = opj_mqc_decode(mqc); \ v = (v << 1) | opj_mqc_decode(mqc); \ @@ -1739,18 +1505,14 @@ static void opj_t1_enc_clnpass( } \ */ \ } \ -} /* VSC and BYPASS by Antonin */ +} static void opj_t1_dec_clnpass_64x64( opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 cblksty) { -#ifdef CONSISTENCY_CHECK - opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, 64, 64, 66); -#else - opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, 64, 64, 66); -#endif + opj_t1_dec_clnpass_internal(t1, bpno, cblksty, 64, 64, 66); } static void opj_t1_dec_clnpass_generic( @@ -1758,13 +1520,8 @@ static void opj_t1_dec_clnpass_generic( OPJ_INT32 bpno, OPJ_INT32 cblksty) { -#ifdef CONSISTENCY_CHECK - opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, t1->w, t1->h, + opj_t1_dec_clnpass_internal(t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride); -#else - opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, t1->w, t1->h, - t1->flags_stride); -#endif } @@ -1863,11 +1620,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( return OPJ_FALSE; } #endif - if (!t1->encoder) { - flagssize = (size_t)h + 2U; - } else { - flagssize = (h + 3U) / 4U + 2U; - } + flagssize = (h + 3U) / 4U + 2U; /* Overflow check */ if (flagssize > (SIZE_MAX / (size_t)t1->flags_stride)) { @@ -1875,10 +1628,11 @@ static OPJ_BOOL opj_t1_allocate_buffers( return OPJ_FALSE; } flagssize *= (size_t)t1->flags_stride; - - if (!t1->encoder) { - - size_t colflags_size; + { + /* BIG FAT XXX */ + opj_flag_t* p; + OPJ_UINT32 x; + OPJ_UINT32 flags_height = (h + 3U) / 4U; if (flagssize > (size_t)t1->flagssize) { /* Overflow check */ @@ -1887,7 +1641,8 @@ static OPJ_BOOL opj_t1_allocate_buffers( return OPJ_FALSE; } opj_aligned_free(t1->flags); - t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(opj_flag_t)); + t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof( + opj_flag_t)); if (!t1->flags) { /* FIXME event manager error callback */ return OPJ_FALSE; @@ -1905,79 +1660,13 @@ static OPJ_BOOL opj_t1_allocate_buffers( memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); - colflags_size = ((((size_t)h + 3U) / 4U) + - 2U); /* Can't overflow, h checked against UINT32_MAX - 3U */ - - /* Overflow check */ - if (colflags_size > (SIZE_MAX / (size_t)t1->flags_stride)) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - colflags_size *= (size_t)t1->flags_stride; - - if (colflags_size > (size_t)t1->colflags_size) { - /* Overflow check */ - if ((size_t)colflags_size > (SIZE_MAX / sizeof(opj_colflag_t))) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - opj_aligned_free(t1->colflags); - t1->colflags = (opj_colflag_t*) opj_aligned_malloc(colflags_size * sizeof( - opj_colflag_t)); - if (!t1->colflags) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ - /* TODO remove this if t1->colflags_size type changes to size_t */ - /* Overflow check */ - if (colflags_size > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#endif - t1->colflags_size = (OPJ_UINT32)colflags_size; - } - memset(t1->colflags, 0, colflags_size * sizeof(opj_colflag_t)); - } else { - /* BIG FAT XXX */ - opj_flag_enc_t* p; - OPJ_UINT32 x; - OPJ_UINT32 flags_height = (h + 3U) / 4U; - - if (flagssize > (size_t)t1->flagssize) { - /* Overflow check */ - if (flagssize > (SIZE_MAX / sizeof(opj_flag_enc_t))) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } - opj_aligned_free(t1->enc_flags); - t1->enc_flags = (opj_flag_enc_t*) opj_aligned_malloc(flagssize * sizeof( - opj_flag_enc_t)); - if (!t1->enc_flags) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ - /* TODO remove this if t1->flagssize type changes to size_t */ - /* Overflow check */ - if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { - /* FIXME event manager error callback */ - return OPJ_FALSE; - } -#endif - } - t1->flagssize = (OPJ_UINT32)flagssize; - - memset(t1->enc_flags, 0, flagssize * sizeof(opj_flag_enc_t)); - - p = &t1->enc_flags[0]; + p = &t1->flags[0]; for (x = 0; x < t1->flags_stride; ++x) { /* magic value to hopefully stop any passes being interested in this entry */ *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); } - p = &t1->enc_flags[((flags_height + 1) * t1->flags_stride)]; + p = &t1->flags[((flags_height + 1) * t1->flags_stride)]; for (x = 0; x < t1->flags_stride; ++x) { /* magic value to hopefully stop any passes being interested in this entry */ *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); @@ -1985,7 +1674,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( if (h % 4) { OPJ_UINT32 v = 0; - p = &t1->enc_flags[((flags_height) * t1->flags_stride)]; + p = &t1->flags[((flags_height) * t1->flags_stride)]; if (h % 4 == 1) { v |= T1_PI_1 | T1_PI_2 | T1_PI_3; } else if (h % 4 == 2) { @@ -2068,15 +1757,6 @@ void opj_t1_destroy(opj_t1_t *p_t1) p_t1->flags = 00; } - if (p_t1->colflags) { - opj_aligned_free(p_t1->colflags); - p_t1->colflags = 00; - } - - if (p_t1->enc_flags) { - opj_aligned_free(p_t1->enc_flags); - p_t1->enc_flags = 00; - } opj_free(p_t1); } @@ -2266,7 +1946,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, OPJ_UINT32 segno, passno; OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */ - mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); if (!opj_t1_allocate_buffers( t1, @@ -2523,7 +2203,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, OPJ_BYTE type = T1_TYPE_MQ; OPJ_FLOAT64 tempwmsedec; - mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); max = 0; for (i = 0; i < t1->w; ++i) { @@ -2553,13 +2233,13 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, switch (passtype) { case 0: - opj_t1_enc_sigpass(t1, bpno, orient, &nmsedec, type, cblksty); + opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty); break; case 1: opj_t1_enc_refpass(t1, bpno, &nmsedec, type, cblksty); break; case 2: - opj_t1_enc_clnpass(t1, bpno, orient, &nmsedec, cblksty); + opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty); /* code switch SEGMARK (i.e. SEGSYM) */ if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) { opj_mqc_segmark_enc(mqc); diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 59c2d016..7a9b3e9a 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -52,33 +52,6 @@ in T1.C are used by some function in TCD.C. /* ----------------------------------------------------------------------- */ #define T1_NMSEDEC_BITS 7 -/* CAUTION: the value of those constants must not be changed, otherwise the */ -/* optimization of opj_t1_updateflags() will break! */ -/* BEGINNING of flags that apply to opj_flag_t */ -#define T1_SIG_NE 0x0001U /**< Context orientation : North-East direction */ -#define T1_SIG_SE 0x0002U /**< Context orientation : South-East direction */ -#define T1_SIG_SW 0x0004U /**< Context orientation : South-West direction */ -#define T1_SIG_NW 0x0008U /**< Context orientation : North-West direction */ -#define T1_SIG_N 0x0010U /**< Context orientation : North direction */ -#define T1_SIG_E 0x0020U /**< Context orientation : East direction */ -#define T1_SIG_S 0x0040U /**< Context orientation : South direction */ -#define T1_SIG_W 0x0080U /**< Context orientation : West direction */ -#define T1_SIG_OTH (T1_SIG_N|T1_SIG_NE|T1_SIG_E|T1_SIG_SE|T1_SIG_S|T1_SIG_SW|T1_SIG_W|T1_SIG_NW) -#define T1_SIG_PRIM (T1_SIG_N|T1_SIG_E|T1_SIG_S|T1_SIG_W) - -#define T1_SGN_N 0x0100U -#define T1_SGN_E 0x0200U -#define T1_SGN_S 0x0400U -#define T1_SGN_W 0x0800U -#define T1_SGN (T1_SGN_N|T1_SGN_E|T1_SGN_S|T1_SGN_W) - -#ifdef CONSISTENCY_CHECK -#define T1_SIG 0x1000U /**< No longer used by decoder */ -#define T1_VISIT 0x4000U /**< No longer used by decoder */ -#endif - -/* END of flags that apply to opj_flag_t */ - #define T1_NUMCTXS_ZC 9 #define T1_NUMCTXS_SC 5 #define T1_NUMCTXS_MAG 3 @@ -97,27 +70,7 @@ in T1.C are used by some function in TCD.C. #define T1_TYPE_MQ 0 /**< Normal coding using entropy coder */ #define T1_TYPE_RAW 1 /**< No encoding the information is store under raw format in codestream (mode switch RAW)*/ -/* Those flags are used by opj_colflag_t */ -#define T1_COLFLAG_RBS 4U /* RBS = Row Bit Shift */ -#define T1_COLFLAG_SIG_OTHER_ROW_0 (1U << 0U) /**< This sample has at least one significant neighbour */ -#define T1_COLFLAG_SIG_ROW_0 (1U << 1U) /**< This sample is significant */ -#define T1_COLFLAG_VISIT_ROW_0 (1U << 2U) /**< This sample has been visited */ -#define T1_COLFLAG_REFINE_ROW_0 (1U << 3U) /**< This sample has been refined */ -#define T1_COLFLAG_SIG_OTHER_ROW_1 (T1_COLFLAG_SIG_OTHER_ROW_0 << (1U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_ROW_1 (T1_COLFLAG_SIG_ROW_0 << (1U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_VISIT_ROW_1 (T1_COLFLAG_VISIT_ROW_0 << (1U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_REFINE_ROW_1 (T1_COLFLAG_REFINE_ROW_0 << (1U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_OTHER_ROW_2 (T1_COLFLAG_SIG_OTHER_ROW_0 << (2U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_ROW_2 (T1_COLFLAG_SIG_ROW_0 << (2U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_VISIT_ROW_2 (T1_COLFLAG_VISIT_ROW_0 << (2U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_REFINE_ROW_2 (T1_COLFLAG_REFINE_ROW_0 << (2U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_OTHER_ROW_3 (T1_COLFLAG_SIG_OTHER_ROW_0 << (3U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_ROW_3 (T1_COLFLAG_SIG_ROW_0 << (3U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3U * T1_COLFLAG_RBS)) -#define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3U * T1_COLFLAG_RBS)) - - -/* BEGINNING of flags that apply to opj_flag_enc_t */ +/* BEGINNING of flags that apply to opj_flag_t */ /** We hold the state of individual data points for the T1 encoder using * a single 32-bit flags word to hold the state of 4 data points. This corresponds * to the 4-point-high columns that the data is processed in. @@ -217,16 +170,12 @@ in T1.C are used by some function in TCD.C. #define T1_LUT_SIG_E (1U << 5) #define T1_LUT_SGN_S (1U << 6) #define T1_LUT_SIG_S (1U << 7) -/* END of flags that apply to opj_flag_enc_t */ +/* END of flags that apply to opj_flag_t */ /* ----------------------------------------------------------------------- */ -typedef OPJ_UINT16 opj_flag_t; - /** Flags for 4 consecutive rows of a column */ -typedef OPJ_UINT16 opj_colflag_t; - -typedef OPJ_UINT32 opj_flag_enc_t; +typedef OPJ_UINT32 opj_flag_t; /** Tier-1 coding (coding of code-block coefficients) @@ -239,27 +188,22 @@ typedef struct opj_t1 { opj_raw_t *raw; OPJ_INT32 *data; - /** Flags used by decoder */ - opj_flag_t *flags; - /** Addition flag array such that colflags[1+0] is for state of col=0,row=0..3, - colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ... + /** Flags used by decoder and encoder. + * Such that flags[1+0] is for state of col=0,row=0..3, + flags[1+1] for col=1, row=0..3, flags[1+flags_stride] for col=0,row=4..7, ... This array avoids too much cache trashing when processing by 4 vertical samples as done in the various decoding steps. */ - opj_colflag_t* colflags; - /** Flags used by encoder */ - opj_flag_enc_t *enc_flags; + opj_flag_t *flags; + OPJ_UINT32 w; OPJ_UINT32 h; OPJ_UINT32 datasize; OPJ_UINT32 flagssize; OPJ_UINT32 flags_stride; - OPJ_UINT32 colflags_size; OPJ_UINT32 data_stride; OPJ_BOOL encoder; } opj_t1_t; -#define MACRO_t1_flags(x,y) t1->flags[((x)*(t1->flags_stride))+(y)] - /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ diff --git a/src/lib/openjp2/t1_generate_luts.c b/src/lib/openjp2/t1_generate_luts.c index be0243c3..a4ce1e17 100644 --- a/src/lib/openjp2/t1_generate_luts.c +++ b/src/lib/openjp2/t1_generate_luts.c @@ -39,87 +39,7 @@ #include "opj_includes.h" -static int t1_init_ctxno_zc(unsigned int f, unsigned int orient) -{ - int h, v, d, n, t, hv; - h = ((f & T1_SIG_W) != 0) + ((f & T1_SIG_E) != 0); - v = ((f & T1_SIG_N) != 0) + ((f & T1_SIG_S) != 0); - d = ((f & T1_SIG_NW) != 0) + ((f & T1_SIG_NE) != 0) + (( - f & T1_SIG_SE) != 0) + ((f & T1_SIG_SW) != 0); - n = 0; - t = 0; - hv = 0; - - switch (orient) { - case 2: - t = h; - h = v; - v = t; - /* fall through */ - case 0: - case 1: - if (!h) { - if (!v) { - if (!d) { - n = 0; - } else if (d == 1) { - n = 1; - } else { - n = 2; - } - } else if (v == 1) { - n = 3; - } else { - n = 4; - } - } else if (h == 1) { - if (!v) { - if (!d) { - n = 5; - } else { - n = 6; - } - } else { - n = 7; - } - } else { - n = 8; - } - break; - case 3: - hv = h + v; - if (!d) { - if (!hv) { - n = 0; - } else if (hv == 1) { - n = 1; - } else { - n = 2; - } - } else if (d == 1) { - if (!hv) { - n = 3; - } else if (hv == 1) { - n = 4; - } else { - n = 5; - } - } else if (d == 2) { - if (!hv) { - n = 6; - } else { - n = 7; - } - } else { - n = 8; - } - break; - } - - return (T1_CTXNO_ZC + n); -} - -static int t1_init_enc_ctxno_zc(int f, int orient) +static int t1_init_ctxno_zc(int f, int orient) { int h, v, d, n, t, hv; n = 0; @@ -196,51 +116,7 @@ static int t1_init_enc_ctxno_zc(int f, int orient) return (T1_CTXNO_ZC + n); } -static int t1_init_ctxno_sc(unsigned int f) -{ - int hc, vc, n; - n = 0; - - hc = opj_int_min(((f & (T1_SIG_E | T1_SGN_E)) == - T1_SIG_E) + ((f & (T1_SIG_W | T1_SGN_W)) == T1_SIG_W), - 1) - opj_int_min(((f & (T1_SIG_E | T1_SGN_E)) == - (T1_SIG_E | T1_SGN_E)) + - ((f & (T1_SIG_W | T1_SGN_W)) == - (T1_SIG_W | T1_SGN_W)), 1); - - vc = opj_int_min(((f & (T1_SIG_N | T1_SGN_N)) == - T1_SIG_N) + ((f & (T1_SIG_S | T1_SGN_S)) == T1_SIG_S), - 1) - opj_int_min(((f & (T1_SIG_N | T1_SGN_N)) == - (T1_SIG_N | T1_SGN_N)) + - ((f & (T1_SIG_S | T1_SGN_S)) == - (T1_SIG_S | T1_SGN_S)), 1); - - if (hc < 0) { - hc = -hc; - vc = -vc; - } - if (!hc) { - if (vc == -1) { - n = 1; - } else if (!vc) { - n = 0; - } else { - n = 1; - } - } else if (hc == 1) { - if (vc == -1) { - n = 2; - } else if (!vc) { - n = 3; - } else { - n = 4; - } - } - - return (T1_CTXNO_SC + n); -} - -static int t1_init_enc_ctxno_sc(int f) +static int t1_init_ctxno_sc(int f) { int hc, vc, n; n = 0; @@ -284,34 +160,7 @@ static int t1_init_enc_ctxno_sc(int f) return (T1_CTXNO_SC + n); } -static int t1_init_spb(unsigned int f) -{ - int hc, vc, n; - - hc = opj_int_min(((f & (T1_SIG_E | T1_SGN_E)) == - T1_SIG_E) + ((f & (T1_SIG_W | T1_SGN_W)) == T1_SIG_W), - 1) - opj_int_min(((f & (T1_SIG_E | T1_SGN_E)) == - (T1_SIG_E | T1_SGN_E)) + - ((f & (T1_SIG_W | T1_SGN_W)) == - (T1_SIG_W | T1_SGN_W)), 1); - - vc = opj_int_min(((f & (T1_SIG_N | T1_SGN_N)) == - T1_SIG_N) + ((f & (T1_SIG_S | T1_SGN_S)) == T1_SIG_S), - 1) - opj_int_min(((f & (T1_SIG_N | T1_SGN_N)) == - (T1_SIG_N | T1_SGN_N)) + - ((f & (T1_SIG_S | T1_SGN_S)) == - (T1_SIG_S | T1_SGN_S)), 1); - - if (!hc && !vc) { - n = 0; - } else { - n = (!(hc > 0 || (!hc && vc > 0))); - } - - return n; -} - -static int t1_init_enc_spb(int f) +static int t1_init_spb(int f) { int hc, vc, n; @@ -358,8 +207,7 @@ int main(int argc, char **argv) unsigned int i, j; double u, v, t; - int lut_ctxno_zc[1024]; - int lut_enc_ctxno_zc[2048]; + int lut_ctxno_zc[2048]; int lut_nmsedec_sig[1 << T1_NMSEDEC_BITS]; int lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS]; int lut_nmsedec_ref[1 << T1_NMSEDEC_BITS]; @@ -370,30 +218,6 @@ int main(int argc, char **argv) printf("/* This file was automatically generated by t1_generate_luts.c */\n\n"); /* lut_ctxno_zc */ - for (j = 0U; j < 4U; ++j) { - for (i = 0U; i < 256U; ++i) { - unsigned int orient = j; - if (orient == 2U) { - orient = 1U; - } else if (orient == 1U) { - orient = 2U; - } - lut_ctxno_zc[(orient << 8) | i] = t1_init_ctxno_zc(i, j); - } - } - - printf("static const OPJ_BYTE lut_ctxno_zc[1024] = {\n "); - for (i = 0U; i < 1023U; ++i) { - printf("%i,", lut_ctxno_zc[i]); - if (!((i + 1U) & 0x1fU)) { - printf("\n "); - } else { - printf(" "); - } - } - printf("%i\n};\n\n", lut_ctxno_zc[1023]); - - /* lut_enc_ctxno_zc */ for (j = 0; j < 4; ++j) { for (i = 0; i < 512; ++i) { int orient = j; @@ -402,68 +226,44 @@ int main(int argc, char **argv) } else if (orient == 1) { orient = 2; } - lut_enc_ctxno_zc[(orient << 9) | i] = t1_init_enc_ctxno_zc(i, j); + lut_ctxno_zc[(orient << 9) | i] = t1_init_ctxno_zc(i, j); } } - printf("static const OPJ_BYTE lut_enc_ctxno_zc[2048] = {\n "); + printf("static const OPJ_BYTE lut_ctxno_zc[2048] = {\n "); for (i = 0; i < 2047; ++i) { - printf("%i,", lut_enc_ctxno_zc[i]); + printf("%i,", lut_ctxno_zc[i]); if (!((i + 1) & 0x1f)) { printf("\n "); } else { printf(" "); } } - printf("%i\n};\n\n", lut_enc_ctxno_zc[2047]); + printf("%i\n};\n\n", lut_ctxno_zc[2047]); /* lut_ctxno_sc */ printf("static const OPJ_BYTE lut_ctxno_sc[256] = {\n "); - for (i = 0U; i < 255U; ++i) { - printf("0x%x,", t1_init_ctxno_sc(i << 4)); - if (!((i + 1U) & 0xfU)) { - printf("\n "); - } else { - printf(" "); - } - } - printf("0x%x\n};\n\n", t1_init_ctxno_sc(255U << 4)); - - /* lut_enc_ctxno_sc */ - printf("static const OPJ_BYTE lut_enc_ctxno_sc[256] = {\n "); for (i = 0; i < 255; ++i) { - printf("0x%x,", t1_init_enc_ctxno_sc(i)); + printf("0x%x,", t1_init_ctxno_sc(i)); if (!((i + 1) & 0xf)) { printf("\n "); } else { printf(" "); } } - printf("0x%x\n};\n\n", t1_init_enc_ctxno_sc(255)); + printf("0x%x\n};\n\n", t1_init_ctxno_sc(255)); /* lut_spb */ printf("static const OPJ_BYTE lut_spb[256] = {\n "); - for (i = 0U; i < 255U; ++i) { - printf("%i,", t1_init_spb(i << 4)); - if (!((i + 1U) & 0x1fU)) { - printf("\n "); - } else { - printf(" "); - } - } - printf("%i\n};\n\n", t1_init_spb(255U << 4)); - - /* lut_enc_spb */ - printf("static const OPJ_BYTE lut_enc_spb[256] = {\n "); for (i = 0; i < 255; ++i) { - printf("%i,", t1_init_enc_spb(i)); + printf("%i,", t1_init_spb(i)); if (!((i + 1) & 0x1f)) { printf("\n "); } else { printf(" "); } } - printf("%i\n};\n\n", t1_init_enc_spb(255)); + printf("%i\n};\n\n", t1_init_spb(255)); /* FIXME FIXME FIXME */ /* fprintf(stdout,"nmsedec luts:\n"); */ diff --git a/src/lib/openjp2/t1_luts.h b/src/lib/openjp2/t1_luts.h index fe560e2e..1a5e7844 100644 --- a/src/lib/openjp2/t1_luts.h +++ b/src/lib/openjp2/t1_luts.h @@ -1,41 +1,6 @@ /* This file was automatically generated by t1_generate_luts.c */ -static const OPJ_BYTE lut_ctxno_zc[1024] = { - 0, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 0, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 0, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 0, 3, 3, 6, 3, 6, 6, 8, 3, 6, 6, 8, 6, 8, 8, 8, 1, 4, 4, 7, 4, 7, 7, 8, 4, 7, 7, 8, 7, 8, 8, 8, - 1, 4, 4, 7, 4, 7, 7, 8, 4, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, - 1, 4, 4, 7, 4, 7, 7, 8, 4, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, - 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, - 1, 4, 4, 7, 4, 7, 7, 8, 4, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, - 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, - 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, - 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8 -}; - -static const OPJ_BYTE lut_enc_ctxno_zc[2048] = { +static const OPJ_BYTE lut_ctxno_zc[2048] = { 0, 1, 3, 3, 1, 2, 3, 3, 5, 6, 7, 7, 6, 6, 7, 7, 0, 1, 3, 3, 1, 2, 3, 3, 5, 6, 7, 7, 6, 6, 7, 7, 5, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 5, 6, 7, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, 1, 2, 3, 3, 2, 2, 3, 3, 6, 6, 7, 7, 6, 6, 7, 7, @@ -103,25 +68,6 @@ static const OPJ_BYTE lut_enc_ctxno_zc[2048] = { }; static const OPJ_BYTE lut_ctxno_sc[256] = { - 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xd, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd, - 0x9, 0xa, 0xc, 0xb, 0xa, 0x9, 0xd, 0xc, 0xc, 0xb, 0xc, 0xb, 0xd, 0xc, 0xd, 0xc, - 0x9, 0xa, 0xc, 0xb, 0xa, 0xa, 0xb, 0xb, 0xc, 0xd, 0x9, 0xa, 0xd, 0xd, 0xa, 0xa, - 0x9, 0xa, 0xc, 0xd, 0xa, 0x9, 0xb, 0xc, 0xc, 0xb, 0x9, 0xa, 0xd, 0xc, 0xa, 0x9, - 0x9, 0xa, 0xc, 0xd, 0xa, 0x9, 0xb, 0xc, 0xc, 0xd, 0xc, 0xd, 0xb, 0xc, 0xb, 0xc, - 0x9, 0xa, 0xc, 0xb, 0xa, 0xa, 0xb, 0xb, 0xc, 0xb, 0xc, 0xb, 0xb, 0xb, 0xb, 0xb, - 0x9, 0xa, 0xc, 0xb, 0xa, 0x9, 0xd, 0xc, 0xc, 0xd, 0x9, 0xa, 0xb, 0xc, 0xa, 0x9, - 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xb, 0x9, 0xa, 0xb, 0xb, 0xa, 0xa, - 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xb, 0x9, 0xa, 0xb, 0xb, 0xa, 0xa, - 0x9, 0xa, 0xc, 0xb, 0xa, 0x9, 0xd, 0xc, 0xc, 0xd, 0x9, 0xa, 0xb, 0xc, 0xa, 0x9, - 0x9, 0xa, 0xc, 0xb, 0xa, 0xa, 0xb, 0xb, 0xc, 0xb, 0xc, 0xb, 0xb, 0xb, 0xb, 0xb, - 0x9, 0xa, 0xc, 0xd, 0xa, 0x9, 0xb, 0xc, 0xc, 0xd, 0xc, 0xd, 0xb, 0xc, 0xb, 0xc, - 0x9, 0xa, 0xc, 0xd, 0xa, 0x9, 0xb, 0xc, 0xc, 0xb, 0x9, 0xa, 0xd, 0xc, 0xa, 0x9, - 0x9, 0xa, 0xc, 0xb, 0xa, 0xa, 0xb, 0xb, 0xc, 0xd, 0x9, 0xa, 0xd, 0xd, 0xa, 0xa, - 0x9, 0xa, 0xc, 0xb, 0xa, 0x9, 0xd, 0xc, 0xc, 0xb, 0xc, 0xb, 0xd, 0xc, 0xd, 0xc, - 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xd, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd -}; - -static const OPJ_BYTE lut_enc_ctxno_sc[256] = { 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0xc, 0xc, 0xd, 0xb, 0xc, 0xc, 0xd, 0xb, 0x9, 0x9, 0xa, 0xa, 0x9, 0x9, 0xa, 0xa, 0xc, 0xc, 0xb, 0xd, 0xc, 0xc, 0xb, 0xd, 0xc, 0xc, 0xd, 0xd, 0xc, 0xc, 0xb, 0xb, 0xc, 0x9, 0xd, 0xa, 0x9, 0xc, 0xa, 0xb, @@ -141,17 +87,6 @@ static const OPJ_BYTE lut_enc_ctxno_sc[256] = { }; static const OPJ_BYTE lut_spb[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, - 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, - 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -static const OPJ_BYTE lut_enc_spb[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, From aa7a8a4398b3792b4a52fb443d49d317a9a4984f Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 31 May 2017 17:15:27 +0200 Subject: [PATCH 08/19] T1: loop unrolling in dec_sigpass_raw and dec_refpass_raw --- src/lib/openjp2/t1.c | 136 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 119 insertions(+), 17 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index bfff8503..44855f22 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -630,28 +630,91 @@ static void opj_t1_dec_sigpass_raw( OPJ_INT32 bpno, OPJ_INT32 cblksty) { - OPJ_INT32 one, half, oneplushalf, vsc; + OPJ_INT32 one, half, oneplushalf; OPJ_UINT32 i, j, k; opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; half = one >> 1; oneplushalf = one | half; - for (k = 0; k < t1->h; k += 4) { - for (i = 0; i < t1->w; ++i) { - opj_flag_t *flagsp2 = flagsp + i; - for (j = k; j < k + 4 && j < t1->h; ++j) { - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || - j == t1->h - 1)) ? 1 : 0; + if ((cblksty & J2K_CCP_CBLKSTY_VSC)) { + OPJ_INT32 *data1 = t1->data; + for (k = 0; k < t1->h; k += 4) { + for (i = 0; i < t1->w; ++i) { + OPJ_INT32* data2 = data1 + i; + opj_flag_t *flagsp2 = flagsp + i; + for (j = k; j < k + 4 && j < t1->h; ++j) { + OPJ_INT32 vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp2, + data2, + oneplushalf, + vsc, + j - k); + data2 += t1->w; + } + } + data1 += t1->w << 2; + flagsp += t1->flags_stride; + } + } else { + OPJ_INT32 *data1 = t1->data; + for (k = 0; k < (t1->h & ~3U); k += 4) { + for (i = 0; i < t1->w; ++i) { + OPJ_INT32* data2 = data1 + i; + opj_flag_t *flagsp2 = flagsp + i; opj_t1_dec_sigpass_step_raw( t1, flagsp2, - &t1->data[(j * t1->w) + i], + data2, oneplushalf, - vsc, - j - k); + 0, /* vsc */ + 0U); + data2 += t1->w; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp2, + data2, + oneplushalf, + 0, /* vsc */ + 1U); + data2 += t1->w; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp2, + data2, + oneplushalf, + 0, /* vsc */ + 2U); + data2 += t1->w; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp2, + data2, + oneplushalf, + 0, /* vsc */ + 3U); + data2 += t1->w; + } + data1 += t1->w << 2; + flagsp += t1->flags_stride; + } + if (k < t1->h) { + for (i = 0; i < t1->w; ++i) { + OPJ_INT32* data2 = data1 + i; + opj_flag_t *flagsp2 = flagsp + i; + for (j = 0; j < t1->h - k; ++j) { + opj_t1_dec_sigpass_step_raw( + t1, + flagsp2, + data2, + oneplushalf, + 0, /* vsc */ + j); + data2 += t1->w; + } } } - flagsp += t1->flags_stride; } } @@ -798,13 +861,14 @@ static INLINE void opj_t1_dec_refpass_step_raw( OPJ_INT32 neghalf, OPJ_UINT32 ci) { - OPJ_INT32 v, t; + OPJ_UINT32 v; + OPJ_INT32 t; opj_raw_t *raw = t1->raw; /* RAW component */ if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == (T1_SIGMA_THIS << (ci * 3U))) { - v = (OPJ_INT32)opj_raw_decode(raw); + v = opj_raw_decode(raw); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; *flagsp |= T1_MU_THIS << (ci * 3U); @@ -981,23 +1045,61 @@ static void opj_t1_dec_refpass_raw( { OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; + OPJ_INT32 *data1 = t1->data; opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; - for (k = 0; k < t1->h; k += 4) { + for (k = 0; k < (t1->h & ~3U); k += 4) { for (i = 0; i < t1->w; ++i) { + OPJ_INT32 *data2 = data1 + i; opj_flag_t *flagsp2 = flagsp1 + i; - for (j = k; j < k + 4 && j < t1->h; ++j) { + opj_t1_dec_refpass_step_raw( + t1, + flagsp2, + data2, + poshalf, + neghalf, 0U); + data2 += t1->w; + opj_t1_dec_refpass_step_raw( + t1, + flagsp2, + data2, + poshalf, + neghalf, 1U); + data2 += t1->w; + opj_t1_dec_refpass_step_raw( + t1, + flagsp2, + data2, + poshalf, + neghalf, 2U); + data2 += t1->w; + opj_t1_dec_refpass_step_raw( + t1, + flagsp2, + data2, + poshalf, + neghalf, 3U); + data2 += t1->w; + } + data1 += t1->w << 2; + flagsp1 += t1->flags_stride; + } + if (k < t1->h) { + for (i = 0; i < t1->w; ++i) { + OPJ_INT32 *data2 = data1 + i; + opj_flag_t *flagsp2 = flagsp1 + i; + for (j = k; j < t1->h; ++j) { opj_t1_dec_refpass_step_raw( t1, flagsp2, - &t1->data[(j * t1->w) + i], + data2, poshalf, neghalf, j - k); + data2 += t1->w; } } - flagsp1 += t1->flags_stride; } } From 0ec842e1f1e6ebc80df5308cab457372456b204d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 31 May 2017 19:45:03 +0200 Subject: [PATCH 09/19] Inline opj_raw_decode() --- src/lib/openjp2/raw.c | 22 -------------- src/lib/openjp2/raw.h | 4 ++- src/lib/openjp2/raw_inl.h | 60 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 23 deletions(-) create mode 100644 src/lib/openjp2/raw_inl.h diff --git a/src/lib/openjp2/raw.c b/src/lib/openjp2/raw.c index 516d63fb..51f91492 100644 --- a/src/lib/openjp2/raw.c +++ b/src/lib/openjp2/raw.c @@ -76,25 +76,3 @@ void opj_raw_init_dec(opj_raw_t *raw, OPJ_BYTE *bp, OPJ_UINT32 len) raw->c = 0; raw->ct = 0; } - -OPJ_UINT32 opj_raw_decode(opj_raw_t *raw) -{ - OPJ_UINT32 d; - if (raw->ct == 0) { - raw->ct = 8; - if (raw->len == raw->lenmax) { - raw->c = 0xff; - } else { - if (raw->c == 0xff) { - raw->ct = 7; - } - raw->c = *(raw->start + raw->len); - raw->len++; - } - } - raw->ct--; - d = ((OPJ_UINT32)raw->c >> raw->ct) & 0x01U; - - return d; -} - diff --git a/src/lib/openjp2/raw.h b/src/lib/openjp2/raw.h index 9a32c83d..dfb0ae01 100644 --- a/src/lib/openjp2/raw.h +++ b/src/lib/openjp2/raw.h @@ -66,6 +66,8 @@ typedef struct opj_raw { OPJ_BYTE *end; } opj_raw_t; +#include "raw_inl.h" + /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ @@ -97,7 +99,7 @@ Decode a symbol using raw-decoder. Cfr p.506 TAUBMAN @param raw RAW handle @return Returns the decoded symbol (0 or 1) */ -OPJ_UINT32 opj_raw_decode(opj_raw_t *raw); +static INLINE OPJ_UINT32 opj_raw_decode(opj_raw_t *raw); /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/src/lib/openjp2/raw_inl.h b/src/lib/openjp2/raw_inl.h new file mode 100644 index 00000000..b2ec0591 --- /dev/null +++ b/src/lib/openjp2/raw_inl.h @@ -0,0 +1,60 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium + * Copyright (c) 2002-2014, Professor Benoit Macq + * Copyright (c) 2003-2007, Francois-Olivier Devaux + * Copyright (c) 2003-2014, Antonin Descampe + * Copyright (c) 2005, Herve Drolon, FreeImage Team + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** +Decode a symbol using raw-decoder. Cfr p.506 TAUBMAN +@param raw RAW handle +@return Returns the decoded symbol (0 or 1) +*/ +static INLINE OPJ_UINT32 opj_raw_decode(opj_raw_t *raw) +{ + OPJ_UINT32 d; + if (raw->ct == 0) { + raw->ct = 8; + if (raw->len == raw->lenmax) { + raw->c = 0xff; + } else { + if (raw->c == 0xff) { + raw->ct = 7; + } + raw->c = *(raw->start + raw->len); + raw->len++; + } + } + raw->ct--; + d = ((OPJ_UINT32)raw->c >> raw->ct) & 0x01U; + + return d; +} From a5003787ffdbc4fb4a9b4e18187e9f69c36b9e9b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 1 Jun 2017 10:23:30 +0200 Subject: [PATCH 10/19] T1: remove flags_stride variable from opj_t1_t --- src/lib/openjp2/t1.c | 185 ++++++++++++++++++++++--------------------- src/lib/openjp2/t1.h | 1 - 2 files changed, 96 insertions(+), 90 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 44855f22..2f3a9692 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -44,7 +44,7 @@ /** @defgroup T1 T1 - Implementation of the tier-1 coding */ /*@{*/ -#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * t1->flags_stride]) +#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)]) /** @name Local static functions */ /*@{*/ @@ -422,7 +422,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, #endif opj_mqc_encode(mqc, v ^ spb); } - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2); } *flagsp |= T1_PI_THIS << (ci * 3U); } @@ -448,7 +448,7 @@ static INLINE void opj_t1_dec_sigpass_step_raw( if (opj_raw_decode(raw)) { v = opj_raw_decode(raw); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2); } *flagsp |= T1_PI_THIS << (ci * 3U); } @@ -517,7 +517,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_mqc_setcurctx(mqc, ctxt2); v = opj_mqc_decode(mqc) ^ spb; *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2); } *flagsp |= T1_PI_THIS << (ci * 3U); } @@ -534,7 +534,7 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_UINT32 i, k; OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_t* f = &T1_FLAGS(0, 0); - OPJ_UINT32 const extra = t1->flags_stride - t1->w; + OPJ_UINT32 const extra = 2; *nmsedec = 0; #ifdef DEBUG_ENC_SIG @@ -641,31 +641,30 @@ static void opj_t1_dec_sigpass_raw( for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { OPJ_INT32* data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp + i; for (j = k; j < k + 4 && j < t1->h; ++j) { OPJ_INT32 vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; opj_t1_dec_sigpass_step_raw( t1, - flagsp2, + flagsp, data2, oneplushalf, vsc, j - k); data2 += t1->w; } + flagsp ++; } data1 += t1->w << 2; - flagsp += t1->flags_stride; + flagsp += 2; } } else { OPJ_INT32 *data1 = t1->data; for (k = 0; k < (t1->h & ~3U); k += 4) { for (i = 0; i < t1->w; ++i) { OPJ_INT32* data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp + i; opj_t1_dec_sigpass_step_raw( t1, - flagsp2, + flagsp, data2, oneplushalf, 0, /* vsc */ @@ -673,7 +672,7 @@ static void opj_t1_dec_sigpass_raw( data2 += t1->w; opj_t1_dec_sigpass_step_raw( t1, - flagsp2, + flagsp, data2, oneplushalf, 0, /* vsc */ @@ -681,7 +680,7 @@ static void opj_t1_dec_sigpass_raw( data2 += t1->w; opj_t1_dec_sigpass_step_raw( t1, - flagsp2, + flagsp, data2, oneplushalf, 0, /* vsc */ @@ -689,30 +688,31 @@ static void opj_t1_dec_sigpass_raw( data2 += t1->w; opj_t1_dec_sigpass_step_raw( t1, - flagsp2, + flagsp, data2, oneplushalf, 0, /* vsc */ 3U); data2 += t1->w; + flagsp ++; } data1 += t1->w << 2; - flagsp += t1->flags_stride; + flagsp += 2; } if (k < t1->h) { for (i = 0; i < t1->w; ++i) { OPJ_INT32* data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp + i; for (j = 0; j < t1->h - k; ++j) { opj_t1_dec_sigpass_step_raw( t1, - flagsp2, + flagsp, data2, oneplushalf, 0, /* vsc */ j); data2 += t1->w; } + flagsp ++; } } } @@ -723,34 +723,35 @@ static void opj_t1_dec_sigpass_raw( OPJ_INT32 one, half, oneplushalf; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ + opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ one = 1 << bpno; \ half = one >> 1; \ oneplushalf = one | half; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flagsp2 = flagsp1 + i; \ - if( *flagsp2 == 0 ) continue; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 0U, flags_stride); \ - data2 += w; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 1U, flags_stride); \ - data2 += w; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 2U, flags_stride); \ - data2 += w; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, 3U, flags_stride); \ - data2 += w; \ + if( *flagsp != 0 ) { \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 0U, flags_stride); \ + data2 += w; \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 1U, flags_stride); \ + data2 += w; \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 2U, flags_stride); \ + data2 += w; \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 3U, flags_stride); \ + data2 += w; \ + } \ + flagsp ++; \ } \ data1 += w << 2; \ - flagsp1 += flags_stride; \ + flagsp += 2; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flagsp2 = flagsp1 + i; \ for (j = k; j < h; ++j) { \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp2, data2, oneplushalf, j - k, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, j - k, flags_stride); \ data2 += w; \ } \ + flagsp ++; \ } \ } @@ -765,7 +766,7 @@ static void opj_t1_dec_sigpass_mqc_generic( opj_t1_t *t1, OPJ_INT32 bpno) { - opj_t1_dec_sigpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride); + opj_t1_dec_sigpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2); } static void opj_t1_dec_sigpass_mqc_vsc( @@ -775,38 +776,40 @@ static void opj_t1_dec_sigpass_mqc_vsc( OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; OPJ_INT32 *data1 = t1->data; - opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); + opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; half = one >> 1; oneplushalf = one | half; for (k = 0; k < (t1->h & ~3U); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp1 + i; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 0, 0U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 0, 0U); data2 += t1->w; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 0, 1U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 0, 1U); data2 += t1->w; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 0, 2U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 0, 2U); data2 += t1->w; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp2, data2, oneplushalf, 1, 3U); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 1, 3U); data2 += t1->w; + flagsp ++; } data1 += t1->w << 2; - flagsp1 += t1->flags_stride; + flagsp += 2; } for (i = 0; i < t1->w; ++i) { - opj_flag_t *flagsp2 = flagsp1 + i; + OPJ_INT32 *data2 = data1 + i; for (j = k; j < t1->h; ++j) { vsc = (j == t1->h - 1) ? 1 : 0; opj_t1_dec_sigpass_step_mqc_vsc( t1, - flagsp2, - &t1->data[(j * t1->w) + i], + flagsp, + data2, oneplushalf, vsc, j - k); + data2 += t1->w; } + flagsp ++; } } @@ -940,7 +943,7 @@ static void opj_t1_enc_refpass( OPJ_UINT32 i, k; const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); opj_flag_t* f = &T1_FLAGS(0, 0); - const OPJ_UINT32 extra = t1->flags_stride - t1->w; + const OPJ_UINT32 extra = 2U; *nmsedec = 0; #ifdef DEBUG_ENC_REF @@ -1046,59 +1049,59 @@ static void opj_t1_dec_refpass_raw( OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; OPJ_INT32 *data1 = t1->data; - opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); + opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < (t1->h & ~3U); k += 4) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp1 + i; opj_t1_dec_refpass_step_raw( t1, - flagsp2, + flagsp, data2, poshalf, neghalf, 0U); data2 += t1->w; opj_t1_dec_refpass_step_raw( t1, - flagsp2, + flagsp, data2, poshalf, neghalf, 1U); data2 += t1->w; opj_t1_dec_refpass_step_raw( t1, - flagsp2, + flagsp, data2, poshalf, neghalf, 2U); data2 += t1->w; opj_t1_dec_refpass_step_raw( t1, - flagsp2, + flagsp, data2, poshalf, neghalf, 3U); data2 += t1->w; + flagsp ++; } data1 += t1->w << 2; - flagsp1 += t1->flags_stride; + flagsp += 2; } if (k < t1->h) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp1 + i; for (j = k; j < t1->h; ++j) { opj_t1_dec_refpass_step_raw( t1, - flagsp2, + flagsp, data2, poshalf, neghalf, j - k); data2 += t1->w; } + flagsp ++; } } } @@ -1108,34 +1111,35 @@ static void opj_t1_dec_refpass_raw( OPJ_INT32 one, poshalf, neghalf; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ + opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ one = 1 << bpno; \ poshalf = one >> 1; \ neghalf = bpno > 0 ? -poshalf : -1; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flagsp2 = flagsp1 + i; \ - if( *flagsp2 == 0 ) continue; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 0U); \ - data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 1U); \ - data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 2U); \ - data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, 3U); \ - data2 += w; \ + if( *flagsp != 0 ) { \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 0U); \ + data2 += w; \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 1U); \ + data2 += w; \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 2U); \ + data2 += w; \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 3U); \ + data2 += w; \ + } \ + flagsp ++; \ } \ data1 += w << 2; \ - flagsp1 += flags_stride; \ + flagsp += 2; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flagsp2 = flagsp1 + i; \ for (j = k; j < h; ++j) { \ - opj_t1_dec_refpass_step_mqc(t1, flagsp2, data2, poshalf, neghalf, j - k); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, j - k); \ data2 += w; \ } \ + flagsp ++; \ } \ } @@ -1150,7 +1154,7 @@ static void opj_t1_dec_refpass_mqc_generic( opj_t1_t *t1, OPJ_INT32 bpno) { - opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride); + opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U); } static void opj_t1_dec_refpass_mqc_vsc( @@ -1161,42 +1165,44 @@ static void opj_t1_dec_refpass_mqc_vsc( OPJ_UINT32 i, j, k; OPJ_INT32 vsc; OPJ_INT32 *data1 = t1->data; - opj_flag_t *flagsp1 = &T1_FLAGS(0, 0); + opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < (t1->h & ~(OPJ_UINT32)3U); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flagsp2 = flagsp1 + i; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, 0, 0U); data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, 0, 1U); data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, 0, 2U); data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp2, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, 1, 3U); data2 += t1->w; + flagsp ++; } data1 += t1->w << 2; - flagsp1 += t1->flags_stride; + flagsp += 2U; } for (i = 0; i < t1->w; ++i) { - opj_flag_t *flagsp2 = flagsp1 + i; + OPJ_INT32 *data2 = data1 + i; for (j = k; j < t1->h; ++j) { vsc = (j == t1->h - 1) ? 1 : 0; opj_t1_dec_refpass_step_mqc_vsc( t1, - flagsp2, - &t1->data[(j * t1->w) + i], + flagsp, + data2, poshalf, neghalf, vsc, j - k); + data2 += t1->w; } + flagsp ++; } } @@ -1281,7 +1287,7 @@ LABEL_PARTIAL: printf(" spb=%d\n", spb); #endif opj_mqc_encode(mqc, v ^ spb); - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); } } *flagsp &= ~(T1_PI_THIS << (3U * ci)); @@ -1306,7 +1312,7 @@ static void opj_t1_dec_clnpass_step_partial( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ } @@ -1330,7 +1336,7 @@ static void opj_t1_dec_clnpass_step( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); } } /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ @@ -1394,7 +1400,7 @@ LABEL_PARTIAL: opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->flags_stride); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); } } /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ @@ -1623,7 +1629,7 @@ static void opj_t1_dec_clnpass_generic( OPJ_INT32 cblksty) { opj_t1_dec_clnpass_internal(t1, bpno, cblksty, t1->w, t1->h, - t1->flags_stride); + t1->w + 2U); } @@ -1665,6 +1671,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( OPJ_UINT32 h) { size_t flagssize; + OPJ_UINT32 flags_stride; /* encoder uses tile buffer, so no need to allocate */ if (!t1->encoder) { @@ -1713,7 +1720,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( /* FIXME event manager error callback */ return OPJ_FALSE; } - t1->flags_stride = w + 2U; /* can't be 0U */ + flags_stride = w + 2U; /* can't be 0U */ #if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */ /* Overflow check */ @@ -1725,11 +1732,11 @@ static OPJ_BOOL opj_t1_allocate_buffers( flagssize = (h + 3U) / 4U + 2U; /* Overflow check */ - if (flagssize > (SIZE_MAX / (size_t)t1->flags_stride)) { + if (flagssize > (SIZE_MAX / (size_t)flags_stride)) { /* FIXME event manager error callback */ return OPJ_FALSE; } - flagssize *= (size_t)t1->flags_stride; + flagssize *= (size_t)flags_stride; { /* BIG FAT XXX */ opj_flag_t* p; @@ -1763,20 +1770,20 @@ static OPJ_BOOL opj_t1_allocate_buffers( memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); p = &t1->flags[0]; - for (x = 0; x < t1->flags_stride; ++x) { + for (x = 0; x < flags_stride; ++x) { /* magic value to hopefully stop any passes being interested in this entry */ *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); } - p = &t1->flags[((flags_height + 1) * t1->flags_stride)]; - for (x = 0; x < t1->flags_stride; ++x) { + p = &t1->flags[((flags_height + 1) * flags_stride)]; + for (x = 0; x < flags_stride; ++x) { /* magic value to hopefully stop any passes being interested in this entry */ *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); } if (h % 4) { OPJ_UINT32 v = 0; - p = &t1->flags[((flags_height) * t1->flags_stride)]; + p = &t1->flags[((flags_height) * flags_stride)]; if (h % 4 == 1) { v |= T1_PI_1 | T1_PI_2 | T1_PI_3; } else if (h % 4 == 2) { @@ -1784,7 +1791,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( } else if (h % 4 == 3) { v |= T1_PI_3; } - for (x = 0; x < t1->flags_stride; ++x) { + for (x = 0; x < flags_stride; ++x) { *p++ = v; } } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 7a9b3e9a..7e1df434 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -199,7 +199,6 @@ typedef struct opj_t1 { OPJ_UINT32 h; OPJ_UINT32 datasize; OPJ_UINT32 flagssize; - OPJ_UINT32 flags_stride; OPJ_UINT32 data_stride; OPJ_BOOL encoder; } opj_t1_t; From 10410fe72ec2d20f2bbdfb68a383bce1df17f35d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 1 Jun 2017 11:15:25 +0200 Subject: [PATCH 11/19] T1: avoid pointer indirection for mqc and raw members of opj_t1_t --- src/lib/openjp2/mqc.c | 13 ----------- src/lib/openjp2/mqc.h | 11 +-------- src/lib/openjp2/raw.c | 13 ----------- src/lib/openjp2/raw.h | 11 +-------- src/lib/openjp2/t1.c | 54 +++++++++++++++---------------------------- src/lib/openjp2/t1.h | 4 ++-- 6 files changed, 22 insertions(+), 84 deletions(-) diff --git a/src/lib/openjp2/mqc.c b/src/lib/openjp2/mqc.c index 8a792b60..bd4054c7 100644 --- a/src/lib/openjp2/mqc.c +++ b/src/lib/openjp2/mqc.c @@ -272,19 +272,6 @@ static void opj_mqc_setbits(opj_mqc_t *mqc) ========================================================== */ -opj_mqc_t* opj_mqc_create(void) -{ - opj_mqc_t *mqc = (opj_mqc_t*)opj_malloc(sizeof(opj_mqc_t)); - return mqc; -} - -void opj_mqc_destroy(opj_mqc_t *mqc) -{ - if (mqc) { - opj_free(mqc); - } -} - OPJ_UINT32 opj_mqc_numbytes(opj_mqc_t *mqc) { const ptrdiff_t diff = mqc->bp - mqc->start; diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h index 4a3e3c88..eb44cdd6 100644 --- a/src/lib/openjp2/mqc.h +++ b/src/lib/openjp2/mqc.h @@ -86,16 +86,7 @@ typedef struct opj_mqc { /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ -/** -Create a new MQC handle -@return Returns a new MQC handle if successful, returns NULL otherwise -*/ -opj_mqc_t* opj_mqc_create(void); -/** -Destroy a previously created MQC handle -@param mqc MQC handle to destroy -*/ -void opj_mqc_destroy(opj_mqc_t *mqc); + /** Return the number of bytes written/read since initialisation @param mqc MQC handle diff --git a/src/lib/openjp2/raw.c b/src/lib/openjp2/raw.c index 51f91492..03cc90c0 100644 --- a/src/lib/openjp2/raw.c +++ b/src/lib/openjp2/raw.c @@ -48,19 +48,6 @@ ========================================================== */ -opj_raw_t* opj_raw_create(void) -{ - opj_raw_t *raw = (opj_raw_t*)opj_malloc(sizeof(opj_raw_t)); - return raw; -} - -void opj_raw_destroy(opj_raw_t *raw) -{ - if (raw) { - opj_free(raw); - } -} - OPJ_UINT32 opj_raw_numbytes(opj_raw_t *raw) { const ptrdiff_t diff = raw->bp - raw->start; diff --git a/src/lib/openjp2/raw.h b/src/lib/openjp2/raw.h index dfb0ae01..1f55ed5e 100644 --- a/src/lib/openjp2/raw.h +++ b/src/lib/openjp2/raw.h @@ -71,16 +71,7 @@ typedef struct opj_raw { /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ -/** -Create a new RAW handle -@return Returns a new RAW handle if successful, returns NULL otherwise -*/ -opj_raw_t* opj_raw_create(void); -/** -Destroy a previously created RAW handle -@param raw RAW handle to destroy -*/ -void opj_raw_destroy(opj_raw_t *raw); + /** Return the number of bytes written/read since initialisation @param raw RAW handle to destroy diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 2f3a9692..b86ecf23 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -379,7 +379,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << (ci * 3U)) : ~0U; @@ -437,7 +437,7 @@ static INLINE void opj_t1_dec_sigpass_step_raw( OPJ_UINT32 ci) { OPJ_UINT32 v; - opj_raw_t *raw = t1->raw; /* RAW component */ + opj_raw_t *raw = &(t1->raw); /* RAW component */ OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << (ci * 3U)) : ~0U; @@ -464,7 +464,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 const flags = *flagsp; if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && @@ -497,7 +497,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << (ci * 3U)) : ~0U; @@ -829,7 +829,7 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 const shift_flags = vsc ? @@ -867,7 +867,7 @@ static INLINE void opj_t1_dec_refpass_step_raw( OPJ_UINT32 v; OPJ_INT32 t; - opj_raw_t *raw = t1->raw; /* RAW component */ + opj_raw_t *raw = &(t1->raw); /* RAW component */ if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == (T1_SIGMA_THIS << (ci * 3U))) { @@ -889,7 +889,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc( OPJ_UINT32 v; OPJ_INT32 t; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == (T1_SIGMA_THIS << (ci * 3U))) { @@ -914,7 +914,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( OPJ_UINT32 v; OPJ_INT32 t; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 const shift_flags = vsc ? @@ -1223,7 +1223,7 @@ static void opj_t1_enc_clnpass_step( { OPJ_UINT32 v; OPJ_UINT32 ci; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); @@ -1304,7 +1304,7 @@ static void opj_t1_dec_clnpass_step_partial( OPJ_UINT32 ci) { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( *flagsp, flagsp[-1], flagsp[1], @@ -1325,7 +1325,7 @@ static void opj_t1_dec_clnpass_step( { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); opj_mqc_setcurctx(mqc, ctxt1); @@ -1352,7 +1352,7 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); opj_mqc_setcurctx(mqc, ctxt1); @@ -1378,7 +1378,7 @@ static void opj_t1_dec_clnpass_step_vsc( { OPJ_UINT32 v; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << @@ -1416,7 +1416,7 @@ static void opj_t1_enc_clnpass( const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); OPJ_UINT32 agg, runlen; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ const OPJ_UINT32 agg_mask = (cblksty & J2K_CCP_CBLKSTY_VSC) ? ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | T1_CHI_5) : @@ -1501,7 +1501,7 @@ static void opj_t1_enc_clnpass( OPJ_UINT32 i, j, k; \ OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ \ - opj_mqc_t *mqc = t1->mqc; /* MQC component */ \ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ \ \ one = 1 << bpno; \ half = one >> 1; \ @@ -1820,18 +1820,6 @@ opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder) return 00; } - /* create MQC and RAW handles */ - l_t1->mqc = opj_mqc_create(); - if (! l_t1->mqc) { - opj_t1_destroy(l_t1); - return 00; - } - - l_t1->raw = opj_raw_create(); - if (! l_t1->raw) { - opj_t1_destroy(l_t1); - return 00; - } l_t1->encoder = isEncoder; return l_t1; @@ -1849,12 +1837,6 @@ void opj_t1_destroy(opj_t1_t *p_t1) return; } - /* destroy MQC and RAW handles */ - opj_mqc_destroy(p_t1->mqc); - p_t1->mqc = 00; - opj_raw_destroy(p_t1->raw); - p_t1->raw = 00; - /* encoder uses tile buffer, so no need to free */ if (!p_t1->encoder && p_t1->data) { opj_aligned_free(p_t1->data); @@ -2047,8 +2029,8 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, OPJ_UINT32 roishift, OPJ_UINT32 cblksty) { - opj_raw_t *raw = t1->raw; /* RAW component */ - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_raw_t *raw = &(t1->raw); /* RAW component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_INT32 bpno_plus_one; OPJ_UINT32 passtype; @@ -2301,7 +2283,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, { OPJ_FLOAT64 cumwmsedec = 0.0; - opj_mqc_t *mqc = t1->mqc; /* MQC component */ + opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 passno; OPJ_INT32 bpno; diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 7e1df434..a6982ce2 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -183,9 +183,9 @@ Tier-1 coding (coding of code-block coefficients) typedef struct opj_t1 { /** MQC component */ - opj_mqc_t *mqc; + opj_mqc_t mqc; /** RAW component */ - opj_raw_t *raw; + opj_raw_t raw; OPJ_INT32 *data; /** Flags used by decoder and encoder. From a0861855c11bcb8c93f399ca51e766b5c8ef9e33 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 1 Jun 2017 17:02:50 +0200 Subject: [PATCH 12/19] T1: remove use of neghalf variable. It is useless since bpno is always > 0 --- src/lib/openjp2/t1.c | 57 ++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index b86ecf23..d5fba6f9 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -135,21 +135,18 @@ static INLINE void opj_t1_dec_refpass_step_raw( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf, OPJ_UINT32 row); static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf, OPJ_UINT32 row); static INLINE void opj_t1_dec_refpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf, OPJ_INT32 vsc, OPJ_UINT32 row); @@ -861,19 +858,16 @@ static INLINE void opj_t1_dec_refpass_step_raw( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf, OPJ_UINT32 ci) { OPJ_UINT32 v; - OPJ_INT32 t; opj_raw_t *raw = &(t1->raw); /* RAW component */ if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == (T1_SIGMA_THIS << (ci * 3U))) { v = opj_raw_decode(raw); - t = v ? poshalf : neghalf; - *datap += *datap < 0 ? -t : t; + *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; *flagsp |= T1_MU_THIS << (ci * 3U); } } @@ -883,11 +877,9 @@ static INLINE void opj_t1_dec_refpass_step_mqc( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf, OPJ_UINT32 ci) { OPJ_UINT32 v; - OPJ_INT32 t; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ @@ -896,8 +888,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc( OPJ_UINT32 ctxt = opj_t1_getctxno_mag(*flagsp >> (ci * 3U)); opj_mqc_setcurctx(mqc, ctxt); v = opj_mqc_decode(mqc); - t = v ? poshalf : neghalf; - *datap += *datap < 0 ? -t : t; + *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; *flagsp |= T1_MU_THIS << (ci * 3U); } } @@ -907,12 +898,10 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf, OPJ_INT32 vsc, OPJ_UINT32 ci) { OPJ_UINT32 v; - OPJ_INT32 t; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ @@ -927,8 +916,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); opj_mqc_setcurctx(mqc, ctxt); v = opj_mqc_decode(mqc); - t = v ? poshalf : neghalf; - *datap += *datap < 0 ? -t : t; + *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; *flagsp |= T1_MU_THIS << (ci * 3U); } } @@ -1046,13 +1034,12 @@ static void opj_t1_dec_refpass_raw( opj_t1_t *t1, OPJ_INT32 bpno) { - OPJ_INT32 one, poshalf, neghalf; + OPJ_INT32 one, poshalf; OPJ_UINT32 i, j, k; OPJ_INT32 *data1 = t1->data; opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; - neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < (t1->h & ~3U); k += 4) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; @@ -1061,28 +1048,28 @@ static void opj_t1_dec_refpass_raw( flagsp, data2, poshalf, - neghalf, 0U); + 0U); data2 += t1->w; opj_t1_dec_refpass_step_raw( t1, flagsp, data2, poshalf, - neghalf, 1U); + 1U); data2 += t1->w; opj_t1_dec_refpass_step_raw( t1, flagsp, data2, poshalf, - neghalf, 2U); + 2U); data2 += t1->w; opj_t1_dec_refpass_step_raw( t1, flagsp, data2, poshalf, - neghalf, 3U); + 3U); data2 += t1->w; flagsp ++; } @@ -1098,7 +1085,7 @@ static void opj_t1_dec_refpass_raw( flagsp, data2, poshalf, - neghalf, j - k); + j - k); data2 += t1->w; } flagsp ++; @@ -1108,24 +1095,23 @@ static void opj_t1_dec_refpass_raw( #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \ { \ - OPJ_INT32 one, poshalf, neghalf; \ + OPJ_INT32 one, poshalf; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ one = 1 << bpno; \ poshalf = one >> 1; \ - neghalf = bpno > 0 ? -poshalf : -1; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ if( *flagsp != 0 ) { \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 0U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 0U); \ data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 1U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 1U); \ data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 2U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 2U); \ data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, 3U); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 3U); \ data2 += w; \ } \ flagsp ++; \ @@ -1136,7 +1122,7 @@ static void opj_t1_dec_refpass_raw( for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ for (j = k; j < h; ++j) { \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, neghalf, j - k); \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, j - k); \ data2 += w; \ } \ flagsp ++; \ @@ -1161,27 +1147,26 @@ static void opj_t1_dec_refpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno) { - OPJ_INT32 one, poshalf, neghalf; + OPJ_INT32 one, poshalf; OPJ_UINT32 i, j, k; OPJ_INT32 vsc; OPJ_INT32 *data1 = t1->data; opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; poshalf = one >> 1; - neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < (t1->h & ~(OPJ_UINT32)3U); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, 0, 0U); data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, 0, 1U); data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, 0, 2U); data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, neghalf, + opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, 1, 3U); data2 += t1->w; flagsp ++; @@ -1197,7 +1182,7 @@ static void opj_t1_dec_refpass_mqc_vsc( t1, flagsp, data2, - poshalf, neghalf, + poshalf, vsc, j - k); data2 += t1->w; From 2ba861c37cb5032d1fa90a7c9298f6e08a0f5413 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 1 Jun 2017 19:42:03 +0200 Subject: [PATCH 13/19] Optimize opj_t1_update_flags() --- src/lib/openjp2/t1.c | 48 +++++++++----------------------------------- src/lib/openjp2/t1.h | 1 + 2 files changed, 11 insertions(+), 38 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index d5fba6f9..eb69939c 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -306,58 +306,30 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, OPJ_UINT32 s, OPJ_UINT32 stride) { - /* set up to point to the north and south data points' flags words, if required */ - opj_flag_t* north; - opj_flag_t* south; + /* east */ + flagsp[-1] |= T1_SIGMA_5 << (3U * ci); /* mark target as significant */ - *flagsp |= T1_SIGMA_4 << (3U * ci); + *flagsp |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); + + /* west */ + flagsp[1] |= T1_SIGMA_3 << (3U * ci); /* north-west, north, north-east */ if (ci == 0U) { - north = flagsp - stride; - *north |= T1_SIGMA_16; + opj_flag_t* north = flagsp - stride; + *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; north[-1] |= T1_SIGMA_17; north[1] |= T1_SIGMA_15; } /* south-west, south, south-east */ if (ci == 3U) { - south = flagsp + stride; - *south |= T1_SIGMA_1; + opj_flag_t* south = flagsp + stride; + *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; south[-1] |= T1_SIGMA_2; south[1] |= T1_SIGMA_0; } - - /* east */ - flagsp[-1] |= T1_SIGMA_5 << (3U * ci); - - /* west */ - flagsp[1] |= T1_SIGMA_3 << (3U * ci); - - if (s) { - switch (ci) { - case 0U: { - *flagsp |= T1_CHI_1; - north = flagsp - stride; - *north |= T1_CHI_5; - break; - } - case 1: - *flagsp |= T1_CHI_2; - break; - case 2: - *flagsp |= T1_CHI_3; - break; - case 3: { - *flagsp |= T1_CHI_4; - south = flagsp + stride; - *south |= T1_CHI_0; - break; - } - - } - } } /** diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index a6982ce2..17123265 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -124,6 +124,7 @@ in T1.C are used by some function in TCD.C. #define T1_MU_3 (1U << 29) #define T1_PI_3 (1U << 30) #define T1_CHI_5 (1U << 31) +#define T1_CHI_5_I 31 /** As an example, the bits T1_SIGMA_3, T1_SIGMA_4 and T1_SIGMA_5 * indicate the significance state of the west neighbour of data point zero From 7e8b502842075738c9a062a2f06dace3f35dd7cd Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 2 Jun 2017 09:36:25 +0200 Subject: [PATCH 14/19] t1_generate_luts.c: fix compiler warnings --- src/lib/openjp2/t1_generate_luts.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/openjp2/t1_generate_luts.c b/src/lib/openjp2/t1_generate_luts.c index a4ce1e17..9ad6f200 100644 --- a/src/lib/openjp2/t1_generate_luts.c +++ b/src/lib/openjp2/t1_generate_luts.c @@ -39,7 +39,7 @@ #include "opj_includes.h" -static int t1_init_ctxno_zc(int f, int orient) +static int t1_init_ctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient) { int h, v, d, n, t, hv; n = 0; @@ -116,7 +116,7 @@ static int t1_init_ctxno_zc(int f, int orient) return (T1_CTXNO_ZC + n); } -static int t1_init_ctxno_sc(int f) +static int t1_init_ctxno_sc(OPJ_UINT32 f) { int hc, vc, n; n = 0; @@ -160,7 +160,7 @@ static int t1_init_ctxno_sc(int f) return (T1_CTXNO_SC + n); } -static int t1_init_spb(int f) +static int t1_init_spb(OPJ_UINT32 f) { int hc, vc, n; @@ -220,7 +220,7 @@ int main(int argc, char **argv) /* lut_ctxno_zc */ for (j = 0; j < 4; ++j) { for (i = 0; i < 512; ++i) { - int orient = j; + OPJ_UINT32 orient = j; if (orient == 2) { orient = 1; } else if (orient == 1) { From 3d9940a35b8c566c1fb41db56018c91b511a976a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 2 Jun 2017 11:52:16 +0200 Subject: [PATCH 15/19] Force inlining of mqc decoding and pass steps through heavy use of macros, so as to get better register allocation --- src/lib/openjp2/mqc.h | 2 +- src/lib/openjp2/mqc_inl.h | 203 ++++++++-------- src/lib/openjp2/t1.c | 485 ++++++++++++++++++++------------------ 3 files changed, 364 insertions(+), 326 deletions(-) diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h index eb44cdd6..1c0d848b 100644 --- a/src/lib/openjp2/mqc.h +++ b/src/lib/openjp2/mqc.h @@ -191,7 +191,7 @@ Decode a symbol @param mqc MQC handle @return Returns the decoded symbol (0 or 1) */ -static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t * const mqc); +/*static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t * const mqc);*/ /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/src/lib/openjp2/mqc_inl.h b/src/lib/openjp2/mqc_inl.h index 451ce02d..b2c7eef3 100644 --- a/src/lib/openjp2/mqc_inl.h +++ b/src/lib/openjp2/mqc_inl.h @@ -38,126 +38,131 @@ #ifndef __MQC_INL_H #define __MQC_INL_H -/** -FIXME DOC -@param mqc MQC handle -@return -*/ -static INLINE OPJ_UINT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc) -{ - OPJ_UINT32 d; - if (mqc->a < (*mqc->curctx)->qeval) { - d = !((*mqc->curctx)->mps); - *mqc->curctx = (*mqc->curctx)->nlps; - } else { - d = (*mqc->curctx)->mps; - *mqc->curctx = (*mqc->curctx)->nmps; - } - return d; +/* For internal use of opj_mqc_decode_macro() */ +#define opj_mqc_mpsexchange_macro(d, curctx, a) \ +{ \ + if (a < (*curctx)->qeval) { \ + d = !((*curctx)->mps); \ + *curctx = (*curctx)->nlps; \ + } else { \ + d = (*curctx)->mps; \ + *curctx = (*curctx)->nmps; \ + } \ } -/** -FIXME DOC -@param mqc MQC handle -@return -*/ -static INLINE OPJ_UINT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc) -{ - OPJ_UINT32 d; - if (mqc->a < (*mqc->curctx)->qeval) { - mqc->a = (*mqc->curctx)->qeval; - d = (*mqc->curctx)->mps; - *mqc->curctx = (*mqc->curctx)->nmps; - } else { - mqc->a = (*mqc->curctx)->qeval; - d = !((*mqc->curctx)->mps); - *mqc->curctx = (*mqc->curctx)->nlps; - } - - return d; +/* For internal use of opj_mqc_decode_macro() */ +#define opj_mqc_lpsexchange_macro(d, curctx, a) \ +{ \ + if (a < (*curctx)->qeval) { \ + a = (*curctx)->qeval; \ + d = (*curctx)->mps; \ + *curctx = (*curctx)->nmps; \ + } else { \ + a = (*curctx)->qeval; \ + d = !((*curctx)->mps); \ + *curctx = (*curctx)->nlps; \ + } \ } +#define opj_mqc_bytein_macro(mqc, c, ct) \ +{ \ + if (mqc->bp != mqc->end) { \ + OPJ_UINT32 l_c; \ + if (mqc->bp + 1 != mqc->end) { \ + l_c = *(mqc->bp + 1); \ + } else { \ + l_c = 0xff; \ + } \ + if (*mqc->bp == 0xff) { \ + if (l_c > 0x8f) { \ + c += 0xff00; \ + ct = 8; \ + } else { \ + mqc->bp++; \ + c += l_c << 9; \ + ct = 7; \ + } \ + } else { \ + mqc->bp++; \ + c += l_c << 8; \ + ct = 8; \ + } \ + } else { \ + c += 0xff00; \ + ct = 8; \ + } \ +} + +/* For internal use of opj_mqc_decode_macro() */ +#define opj_mqc_renormd_macro(mqc, a, c, ct) \ +{ \ + do { \ + if (ct == 0) { \ + opj_mqc_bytein_macro(mqc, c, ct); \ + } \ + a <<= 1; \ + c <<= 1; \ + ct--; \ + } while (a < 0x8000); \ +} + +#define opj_mqc_decode_macro(d, mqc, curctx, a, c, ct) \ +{ \ + /* Implements ISO 15444-1 C.3.2 Decoding a decision (DECODE) */ \ + /* Note: alternate "J.2 - Decoding an MPS or an LPS in the */ \ + /* software-conventions decoder" has been tried, but does not bring any */ \ + /* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */ \ + a -= (*curctx)->qeval; \ + if ((c >> 16) < (*curctx)->qeval) { \ + opj_mqc_lpsexchange_macro(d, curctx, a); \ + opj_mqc_renormd_macro(mqc, a, c, ct); \ + } else { \ + c -= (*curctx)->qeval << 16; \ + if ((a & 0x8000) == 0) { \ + opj_mqc_mpsexchange_macro(d, curctx, a); \ + opj_mqc_renormd_macro(mqc, a, c, ct); \ + } else { \ + d = (*curctx)->mps; \ + } \ + } \ +} + +#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ + register opj_mqc_state_t **curctx = mqc->curctx; \ + register OPJ_UINT32 c = mqc->c; \ + register OPJ_UINT32 a = mqc->a; \ + register OPJ_UINT32 ct = mqc->ct + +#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ + mqc->curctx = curctx; \ + mqc->c = c; \ + mqc->a = a; \ + mqc->ct = ct; + /** Input a byte @param mqc MQC handle */ static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc) { - /* Implements ISO 15444-1 C.3.4 Compressed image data input (BYTEIN) */ - /* Note: alternate "J.3 - Inserting a new byte into the C register in the */ - /* software-conventions decoder" has been tried, but does not bring any */ - /* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */ - if (mqc->bp != mqc->end) { - OPJ_UINT32 c; - if (mqc->bp + 1 != mqc->end) { - c = *(mqc->bp + 1); - } else { - c = 0xff; - } - if (*mqc->bp == 0xff) { - if (c > 0x8f) { - mqc->c += 0xff00; - mqc->ct = 8; - } else { - mqc->bp++; - mqc->c += c << 9; - mqc->ct = 7; - } - } else { - mqc->bp++; - mqc->c += c << 8; - mqc->ct = 8; - } - } else { - mqc->c += 0xff00; - mqc->ct = 8; - } + opj_mqc_bytein_macro(mqc, mqc->c, mqc->ct); } /** Renormalize mqc->a and mqc->c while decoding @param mqc MQC handle */ -static INLINE void opj_mqc_renormd(opj_mqc_t *const mqc) -{ - do { - if (mqc->ct == 0) { - opj_mqc_bytein(mqc); - } - mqc->a <<= 1; - mqc->c <<= 1; - mqc->ct--; - } while (mqc->a < 0x8000); -} +#define opj_mqc_renormd(mqc) \ + opj_mqc_renormd_macro(mqc, mqc->a, mqc->c, mqc->ct) /** Decode a symbol +@param d OPJ_UINT32 value where to store the decoded symbol @param mqc MQC handle -@return Returns the decoded symbol (0 or 1) +@return Returns the decoded symbol (0 or 1) in d */ -static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t *const mqc) -{ - /* Implements ISO 15444-1 C.3.2 Decoding a decision (DECODE) */ - /* Note: alternate "J.2 - Decoding an MPS or an LPS in the */ - /* software-conventions decoder" has been tried, but does not bring any */ - /* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */ - OPJ_UINT32 d; - mqc->a -= (*mqc->curctx)->qeval; - if ((mqc->c >> 16) < (*mqc->curctx)->qeval) { - d = opj_mqc_lpsexchange(mqc); - opj_mqc_renormd(mqc); - } else { - mqc->c -= (*mqc->curctx)->qeval << 16; - if ((mqc->a & 0x8000) == 0) { - d = opj_mqc_mpsexchange(mqc); - opj_mqc_renormd(mqc); - } else { - d = (*mqc->curctx)->mps; - } - } - - return d; -} +#define opj_mqc_decode(d, mqc) \ + opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct) #endif /* __MQC_INL_H */ diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index eb69939c..c2aa0361 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -46,6 +46,8 @@ #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)]) +#define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] + /** @name Local static functions */ /*@{*/ @@ -154,12 +156,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( /** Decode clean-up pass */ -static void opj_t1_dec_clnpass_step_partial( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_UINT32 row); + static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, @@ -302,34 +299,39 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; } +#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride ) \ +{ \ + /* east */ \ + flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \ + \ + /* mark target as significant */ \ + flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \ + \ + /* west */ \ + flagsp[1] |= T1_SIGMA_3 << (3U * ci); \ + \ + /* north-west, north, north-east */ \ + if (ci == 0U) { \ + opj_flag_t* north = flagsp - (stride); \ + *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \ + north[-1] |= T1_SIGMA_17; \ + north[1] |= T1_SIGMA_15; \ + } \ + \ + /* south-west, south, south-east */ \ + if (ci == 3U) { \ + opj_flag_t* south = flagsp + (stride); \ + *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \ + south[-1] |= T1_SIGMA_2; \ + south[1] |= T1_SIGMA_0; \ + } \ +} + static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, OPJ_UINT32 s, OPJ_UINT32 stride) { - /* east */ - flagsp[-1] |= T1_SIGMA_5 << (3U * ci); - - /* mark target as significant */ - *flagsp |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); - - /* west */ - flagsp[1] |= T1_SIGMA_3 << (3U * ci); - - /* north-west, north, north-east */ - if (ci == 0U) { - opj_flag_t* north = flagsp - stride; - *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; - north[-1] |= T1_SIGMA_17; - north[1] |= T1_SIGMA_15; - } - - /* south-west, south, south-east */ - if (ci == 3U) { - opj_flag_t* south = flagsp + stride; - *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; - south[-1] |= T1_SIGMA_2; - south[1] |= T1_SIGMA_0; - } + opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride); } /** @@ -423,6 +425,32 @@ static INLINE void opj_t1_dec_sigpass_step_raw( } } +#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \ + data_stride, ci, mqc, curctx, \ + v, a, c, ct, oneplushalf) \ +{ \ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ + (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ + opj_t1_setcurctx(curctx, ctxt1); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + if (v) { \ + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ + flags, \ + flagsp[-1], flagsp[1], \ + ci); \ + OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \ + OPJ_UINT32 spb = opj_t1_getspb(lu); \ + opj_t1_setcurctx(curctx, ctxt2); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + v = v ^ spb; \ + data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \ + opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride); \ + } \ + flags |= T1_PI_THIS << (ci * 3U); \ + } \ +} + static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -434,26 +462,9 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( OPJ_UINT32 v; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - OPJ_UINT32 const flags = *flagsp; - - if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && - (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, - flagsp[-1], flagsp[1], - ci); - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); - opj_mqc_setcurctx(mqc, ctxt1); - if (opj_mqc_decode(mqc)) { - OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); - OPJ_UINT32 spb = opj_t1_getspb(lu); - opj_mqc_setcurctx(mqc, ctxt2); - v = opj_mqc_decode(mqc) ^ spb; - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, flags_stride); - } - *flagsp |= T1_PI_THIS << (ci * 3U); - } + opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap, + 0, ci, mqc, mqc->curctx, + v, mqc->a, mqc->c, mqc->ct, oneplushalf); } static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( @@ -480,11 +491,13 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( ci); OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); opj_mqc_setcurctx(mqc, ctxt1); - if (opj_mqc_decode(mqc)) { + opj_mqc_decode(v, mqc); + if (v) { OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); OPJ_UINT32 spb = opj_t1_getspb(lu); opj_mqc_setcurctx(mqc, ctxt2); - v = opj_mqc_decode(mqc) ^ spb; + opj_mqc_decode(v, mqc); + v = v ^ spb; *datap = v ? -oneplushalf : oneplushalf; opj_t1_update_flags(flagsp, ci, v, t1->w + 2); } @@ -691,36 +704,43 @@ static void opj_t1_dec_sigpass_raw( { \ OPJ_INT32 one, half, oneplushalf; \ OPJ_UINT32 i, j, k; \ - OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ + register OPJ_INT32 *data = t1->data; \ + register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \ + const OPJ_UINT32 l_w = w; \ + opj_mqc_t* mqc = &(t1->mqc); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + register OPJ_UINT32 v; \ one = 1 << bpno; \ half = one >> 1; \ oneplushalf = one | half; \ - for (k = 0; k < (h & ~3u); k += 4) { \ - for (i = 0; i < w; ++i) { \ - OPJ_INT32 *data2 = data1 + i; \ - if( *flagsp != 0 ) { \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 0U, flags_stride); \ - data2 += w; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 1U, flags_stride); \ - data2 += w; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 2U, flags_stride); \ - data2 += w; \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 3U, flags_stride); \ - data2 += w; \ + for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + opj_flag_t flags = *flagsp; \ + if( flags != 0 ) { \ + opj_t1_dec_sigpass_step_mqc_macro( \ + flags, flagsp, flags_stride, data, \ + l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc_macro( \ + flags, flagsp, flags_stride, data, \ + l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc_macro( \ + flags, flagsp, flags_stride, data, \ + l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc_macro( \ + flags, flagsp, flags_stride, data, \ + l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf); \ + *flagsp = flags; \ } \ - flagsp ++; \ } \ - data1 += w << 2; \ - flagsp += 2; \ } \ - for (i = 0; i < w; ++i) { \ - OPJ_INT32 *data2 = data1 + i; \ - for (j = k; j < h; ++j) { \ - opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, j - k, flags_stride); \ - data2 += w; \ + UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + for (j = 0; j < h - k; ++j) { \ + opj_t1_dec_sigpass_step_mqc(t1, flagsp, \ + data + j * l_w, oneplushalf, j, flags_stride); \ } \ - flagsp ++; \ + } \ } \ } @@ -844,6 +864,19 @@ static INLINE void opj_t1_dec_refpass_step_raw( } } +#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \ + mqc, curctx, v, a, c, ct, poshalf) \ +{ \ + if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \ + (T1_SIGMA_THIS << (ci * 3U))) { \ + OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \ + opj_t1_setcurctx(curctx, ctxt); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \ + flags |= T1_MU_THIS << (ci * 3U); \ + } \ +} + static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -854,15 +887,9 @@ static INLINE void opj_t1_dec_refpass_step_mqc( OPJ_UINT32 v; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == - (T1_SIGMA_THIS << (ci * 3U))) { - OPJ_UINT32 ctxt = opj_t1_getctxno_mag(*flagsp >> (ci * 3U)); - opj_mqc_setcurctx(mqc, ctxt); - v = opj_mqc_decode(mqc); - *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; - *flagsp |= T1_MU_THIS << (ci * 3U); - } + opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci, + mqc, mqc->curctx, v, mqc->a, mqc->c, + mqc->ct, poshalf); } static INLINE void opj_t1_dec_refpass_step_mqc_vsc( @@ -887,7 +914,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); opj_mqc_setcurctx(mqc, ctxt); - v = opj_mqc_decode(mqc); + opj_mqc_decode(v, mqc); *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; *flagsp |= T1_MU_THIS << (ci * 3U); } @@ -1069,35 +1096,41 @@ static void opj_t1_dec_refpass_raw( { \ OPJ_INT32 one, poshalf; \ OPJ_UINT32 i, j, k; \ - OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ + register OPJ_INT32 *data = t1->data; \ + register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ + const OPJ_UINT32 l_w = w; \ + opj_mqc_t* mqc = &(t1->mqc); \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + register OPJ_UINT32 v; \ one = 1 << bpno; \ poshalf = one >> 1; \ - for (k = 0; k < (h & ~3u); k += 4) { \ - for (i = 0; i < w; ++i) { \ - OPJ_INT32 *data2 = data1 + i; \ - if( *flagsp != 0 ) { \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 0U); \ - data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 1U); \ - data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 2U); \ - data2 += w; \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 3U); \ - data2 += w; \ + for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + opj_flag_t flags = *flagsp; \ + if( flags != 0 ) { \ + opj_t1_dec_refpass_step_mqc_macro( \ + flags, data, l_w, 0, \ + mqc, curctx, v, a, c, ct, poshalf); \ + opj_t1_dec_refpass_step_mqc_macro( \ + flags, data, l_w, 1, \ + mqc, curctx, v, a, c, ct, poshalf); \ + opj_t1_dec_refpass_step_mqc_macro( \ + flags, data, l_w, 2, \ + mqc, curctx, v, a, c, ct, poshalf); \ + opj_t1_dec_refpass_step_mqc_macro( \ + flags, data, l_w, 3, \ + mqc, curctx, v, a, c, ct, poshalf); \ + *flagsp = flags; \ } \ - flagsp ++; \ } \ - data1 += w << 2; \ - flagsp += 2; \ } \ - for (i = 0; i < w; ++i) { \ - OPJ_INT32 *data2 = data1 + i; \ - for (j = k; j < h; ++j) { \ - opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, j - k); \ - data2 += w; \ + UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + for (j = 0; j < h - k; ++j) { \ + opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \ } \ - flagsp ++; \ + } \ } \ } @@ -1252,25 +1285,32 @@ LABEL_PARTIAL: } } - -static void opj_t1_dec_clnpass_step_partial( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_UINT32 ci) -{ - OPJ_UINT32 v; - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, flagsp[-1], flagsp[1], - ci); - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); - /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ +#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ + flags, flagsp, flags_stride, data, \ + data_stride, ci, mqc, curctx, \ + v, a, c, ct, oneplushalf) \ +{ \ + if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\ + do { \ + if( !partial ) { \ + OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ + opj_t1_setcurctx(curctx, ctxt1); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + if( !v ) \ + break; \ + } \ + { \ + OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ + flags, flagsp[-1], flagsp[1], \ + ci); \ + opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + v = v ^ opj_t1_getspb(lu); \ + data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \ + opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride); \ + } \ + } while(0); \ + } \ } static void opj_t1_dec_clnpass_step( @@ -1283,45 +1323,10 @@ static void opj_t1_dec_clnpass_step( OPJ_UINT32 v; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); - opj_mqc_setcurctx(mqc, ctxt1); - if (opj_mqc_decode(mqc)) { - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, flagsp[-1], flagsp[1], - ci); - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); - } - } - /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ -} - -static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_UINT32 ci, - OPJ_UINT32 flags_stride) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); - opj_mqc_setcurctx(mqc, ctxt1); - if (opj_mqc_decode(mqc)) { - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp, flagsp[-1], flagsp[1], - ci); - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, flags_stride); - } + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, + *flagsp, flagsp, t1->w + 2U, datap, + 0, ci, mqc, mqc->curctx, + v, mqc->a, mqc->c, mqc->ct, oneplushalf); } static void opj_t1_dec_clnpass_step_vsc( @@ -1347,7 +1352,8 @@ static void opj_t1_dec_clnpass_step_vsc( if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); opj_mqc_setcurctx(mqc, ctxt1); - if (opj_mqc_decode(mqc)) { + opj_mqc_decode(v, mqc); + if (v) { OPJ_UINT32 lu; LABEL_PARTIAL: lu = opj_t1_getctxtno_sc_or_spb_index( @@ -1355,7 +1361,8 @@ LABEL_PARTIAL: flagsp[1] & vsc_mask, ci); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); + opj_mqc_decode(v, mqc); + v = v ^ opj_t1_getspb(lu); *datap = v ? -oneplushalf : oneplushalf; opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); } @@ -1454,12 +1461,11 @@ static void opj_t1_enc_clnpass( #define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf, agg, vsc; \ - OPJ_UINT32 runlen; \ + OPJ_UINT32 runlen, v; \ OPJ_UINT32 i, j, k; \ OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ - \ - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ \ - \ + const OPJ_UINT32 l_w = w; \ + opj_mqc_t* mqc = &(t1->mqc); \ one = 1 << bpno; \ half = one >> 1; \ oneplushalf = one | half; \ @@ -1476,12 +1482,14 @@ static void opj_t1_enc_clnpass( } \ if (agg) { \ opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ - if (!opj_mqc_decode(mqc)) { \ + opj_mqc_decode(v, mqc); \ + if (!v) { \ continue; \ } \ opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ - runlen = opj_mqc_decode(mqc); \ - runlen = (runlen << 1) | opj_mqc_decode(mqc); \ + opj_mqc_decode(runlen, mqc); \ + opj_mqc_decode(v, mqc); \ + runlen = (runlen << 1) | v; \ } else { \ runlen = 0; \ } \ @@ -1500,70 +1508,95 @@ static void opj_t1_enc_clnpass( flagsp1 += flags_stride; \ } \ } else { \ - OPJ_INT32 *data1 = t1->data; \ - opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ - for (k = 0; k < (h & ~3u); k += 4) { \ - for (i = 0; i < w; ++i) { \ - OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flagsp2 = flagsp1 + i; \ - if (*flagsp2 == 0) { \ - opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ - if (!opj_mqc_decode(mqc)) { \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + register OPJ_UINT32 v; \ + register OPJ_INT32 *data = t1->data; \ + register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ + for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + opj_flag_t flags = *flagsp; \ + if (flags == 0) { \ + OPJ_UINT32 partial = OPJ_TRUE; \ + opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + if (!v) { \ continue; \ } \ - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ - runlen = opj_mqc_decode(mqc); \ - runlen = (runlen << 1) | opj_mqc_decode(mqc); \ - data2 += runlen * w; \ - for (j = runlen; j < 4; ++j) { \ - if (j == runlen) { \ - opj_t1_dec_clnpass_step_partial(t1, flagsp2, data2, oneplushalf, j); \ - } else { \ - opj_t1_dec_clnpass_step(t1, flagsp2, data2, oneplushalf, j); \ - } \ - data2 += w; \ + opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \ + opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ + runlen = (runlen << 1) | v; \ + switch(runlen) { \ + case 0: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\ + flags, flagsp, flags_stride, data, \ + l_w, 0, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + partial = OPJ_FALSE; \ + /* falltrough */ \ + case 1: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ + flags, flagsp, flags_stride, data, \ + l_w, 1, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + partial = OPJ_FALSE; \ + /* falltrough */ \ + case 2: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ + flags, flagsp, flags_stride, data, \ + l_w, 2, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + partial = OPJ_FALSE; \ + /* falltrough */ \ + case 3: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ + flags, flagsp, flags_stride, data, \ + l_w, 3, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + break; \ } \ } else { \ - if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (0 * 3U)))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 0U, flags_stride); \ - } \ - data2 += w; \ - if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (1 * 3U)))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 1U, flags_stride); \ - } \ - data2 += w; \ - if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (2 * 3U)))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 2U, flags_stride); \ - } \ - data2 += w; \ - if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (3 * 3U)))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 3U, flags_stride); \ - } \ - data2 += w; \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 0, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 1, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 2, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 3, mqc, curctx, \ + v, a, c, ct, oneplushalf); \ } \ - *flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ - data1 += w << 2; \ - flagsp1 += flags_stride; \ } \ - for (i = 0; i < w; ++i) { \ - OPJ_INT32 *data2 = data1 + i; \ - opj_flag_t *flagsp2 = flagsp1 + i; \ - for (j = k; j < h; ++j) { \ - opj_t1_dec_clnpass_step(t1, flagsp2, data2, oneplushalf, j - k); \ - data2 += w; \ + UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ + for (j = 0; j < h - k; ++j) { \ + opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j); \ + } \ + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ - *flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ } \ \ if (segsym) { \ - OPJ_UINT32 v = 0; \ + OPJ_UINT32 v, v2; \ opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ - v = opj_mqc_decode(mqc); \ - v = (v << 1) | opj_mqc_decode(mqc); \ - v = (v << 1) | opj_mqc_decode(mqc); \ - v = (v << 1) | opj_mqc_decode(mqc); \ + opj_mqc_decode(v, mqc); \ + opj_mqc_decode(v2, mqc); \ + v = (v << 1) | v2; \ + opj_mqc_decode(v2, mqc); \ + v = (v << 1) | v2; \ + opj_mqc_decode(v2, mqc); \ + v = (v << 1) | v2; \ /* \ if (v!=0xa) { \ opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); \ From dde6cbabc0cb93102c2091c15d11dec1267d0fd8 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 2 Jun 2017 14:25:57 +0200 Subject: [PATCH 16/19] Simplify VSC handling: instead of masking out bits when reading the 4th row. Do not set them when updating flags of the 1st row --- src/lib/openjp2/t1.c | 880 +++++++++++++++---------------------------- 1 file changed, 301 insertions(+), 579 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index c2aa0361..da26227d 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -56,7 +56,8 @@ static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, - OPJ_UINT32 s, OPJ_UINT32 stride); + OPJ_UINT32 s, OPJ_UINT32 stride, + OPJ_UINT32 vsc); /** @@ -68,7 +69,7 @@ static INLINE void opj_t1_dec_sigpass_step_raw( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 vsc, + OPJ_UINT32 vsc, OPJ_UINT32 row); static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, @@ -76,15 +77,8 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_UINT32 row, - OPJ_UINT32 flags_stride); -static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_INT32 vsc, - OPJ_UINT32 row); - + OPJ_UINT32 flags_stride, + OPJ_UINT32 vsc); /** Encode significant pass @@ -102,11 +96,6 @@ static void opj_t1_dec_sigpass_raw( opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 cblksty); -static void opj_t1_dec_sigpass_mqc_vsc( - opj_t1_t *t1, - OPJ_INT32 bpno); - - /** Encode refinement pass @@ -114,8 +103,7 @@ Encode refinement pass static void opj_t1_enc_refpass(opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 cblksty); + OPJ_BYTE type); /** Decode refinement pass @@ -123,9 +111,6 @@ Decode refinement pass static void opj_t1_dec_refpass_raw( opj_t1_t *t1, OPJ_INT32 bpno); -static void opj_t1_dec_refpass_mqc_vsc( - opj_t1_t *t1, - OPJ_INT32 bpno); /** @@ -144,13 +129,6 @@ static INLINE void opj_t1_dec_refpass_step_mqc( OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_UINT32 row); -static INLINE void opj_t1_dec_refpass_step_mqc_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 poshalf, - OPJ_INT32 vsc, - OPJ_UINT32 row); /** @@ -162,15 +140,9 @@ static void opj_t1_dec_clnpass_step( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_UINT32 row); -static void opj_t1_dec_clnpass_step_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_INT32 partial, - OPJ_INT32 vsc, - OPJ_UINT32 row); + OPJ_UINT32 row, + OPJ_UINT32 vsc); + /** Encode clean-up pass */ @@ -299,7 +271,7 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; } -#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride ) \ +#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \ { \ /* east */ \ flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \ @@ -311,7 +283,7 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) flagsp[1] |= T1_SIGMA_3 << (3U * ci); \ \ /* north-west, north, north-east */ \ - if (ci == 0U) { \ + if (ci == 0U && !(vsc)) { \ opj_flag_t* north = flagsp - (stride); \ *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \ north[-1] |= T1_SIGMA_17; \ @@ -329,9 +301,10 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, - OPJ_UINT32 s, OPJ_UINT32 stride) + OPJ_UINT32 s, OPJ_UINT32 stride, + OPJ_UINT32 vsc) { - opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride); + opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc); } /** @@ -345,16 +318,13 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 ci, - OPJ_UINT32 vsc - ) + OPJ_UINT32 vsc) { OPJ_UINT32 v; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | - T1_CHI_S) << (ci * 3U)) : ~0U; - OPJ_UINT32 const flags = *flagsp & vsc_mask; + OPJ_UINT32 const flags = *flagsp; if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { @@ -370,11 +340,9 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, opj_mqc_encode(mqc, v); } if (v) { - /* Note: using flags instead of *flagsp & vsc_mask result */ - /* in slow down. Probably because of register pressure */ OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + *flagsp, + flagsp[-1], flagsp[1], ci); OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); v = *datap < 0 ? 1 : 0; @@ -393,7 +361,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, #endif opj_mqc_encode(mqc, v ^ spb); } - opj_t1_update_flags(flagsp, ci, v, t1->w + 2); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); } *flagsp |= T1_PI_THIS << (ci * 3U); } @@ -404,22 +372,20 @@ static INLINE void opj_t1_dec_sigpass_step_raw( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 vsc, + OPJ_UINT32 vsc, OPJ_UINT32 ci) { OPJ_UINT32 v; opj_raw_t *raw = &(t1->raw); /* RAW component */ - OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | - T1_CHI_S) << (ci * 3U)) : ~0U; - OPJ_UINT32 const flags = *flagsp & vsc_mask; + OPJ_UINT32 const flags = *flagsp; if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { if (opj_raw_decode(raw)) { v = opj_raw_decode(raw); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2); + opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); } *flagsp |= T1_PI_THIS << (ci * 3U); } @@ -427,7 +393,7 @@ static INLINE void opj_t1_dec_sigpass_step_raw( #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \ data_stride, ci, mqc, curctx, \ - v, a, c, ct, oneplushalf) \ + v, a, c, ct, oneplushalf, vsc) \ { \ if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ @@ -445,7 +411,7 @@ static INLINE void opj_t1_dec_sigpass_step_raw( opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ v = v ^ spb; \ data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \ - opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride); \ + opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \ } \ flags |= T1_PI_THIS << (ci * 3U); \ } \ @@ -457,55 +423,17 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_UINT32 ci, - OPJ_UINT32 flags_stride) + OPJ_UINT32 flags_stride, + OPJ_UINT32 vsc) { OPJ_UINT32 v; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap, 0, ci, mqc, mqc->curctx, - v, mqc->a, mqc->c, mqc->ct, oneplushalf); + v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc); } -static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_INT32 vsc, - OPJ_UINT32 ci) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | - T1_CHI_S) << (ci * 3U)) : ~0U; - OPJ_UINT32 const flags = *flagsp & vsc_mask; - - if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && - (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { - OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, - ci); - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); - opj_mqc_setcurctx(mqc, ctxt1); - opj_mqc_decode(v, mqc); - if (v) { - OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); - OPJ_UINT32 spb = opj_t1_getspb(lu); - opj_mqc_setcurctx(mqc, ctxt2); - opj_mqc_decode(v, mqc); - v = v ^ spb; - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2); - } - *flagsp |= T1_PI_THIS << (ci * 3U); - } -} - - static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 *nmsedec, @@ -543,7 +471,7 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, one, nmsedec, type, - 0, 0); + 0, cblksty & J2K_CCP_CBLKSTY_VSC); opj_t1_enc_sigpass_step( t1, f, @@ -570,7 +498,7 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, one, nmsedec, type, - 3, cblksty & J2K_CCP_CBLKSTY_VSC); + 3, 0); ++f; } f += extra; @@ -600,7 +528,7 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, nmsedec, type, j - k, - (j == t1->h - 1 && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); + (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); } ++f; } @@ -614,93 +542,71 @@ static void opj_t1_dec_sigpass_raw( { OPJ_INT32 one, half, oneplushalf; OPJ_UINT32 i, j, k; + OPJ_INT32 *data1 = t1->data; opj_flag_t *flagsp = &T1_FLAGS(0, 0); one = 1 << bpno; half = one >> 1; oneplushalf = one | half; - if ((cblksty & J2K_CCP_CBLKSTY_VSC)) { - OPJ_INT32 *data1 = t1->data; - for (k = 0; k < t1->h; k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32* data2 = data1 + i; - for (j = k; j < k + 4 && j < t1->h; ++j) { - OPJ_INT32 vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - vsc, - j - k); - data2 += t1->w; - } - flagsp ++; - } - data1 += t1->w << 2; - flagsp += 2; + + for (k = 0; k < (t1->h & ~3U); k += 4) { + for (i = 0; i < t1->w; ++i) { + OPJ_INT32* data2 = data1 + i; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data2, + oneplushalf, + cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ + 0U); + data2 += t1->w; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data2, + oneplushalf, + OPJ_FALSE, /* vsc */ + 1U); + data2 += t1->w; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data2, + oneplushalf, + OPJ_FALSE, /* vsc */ + 2U); + data2 += t1->w; + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data2, + oneplushalf, + OPJ_FALSE, /* vsc */ + 3U); + data2 += t1->w; + flagsp ++; } - } else { - OPJ_INT32 *data1 = t1->data; - for (k = 0; k < (t1->h & ~3U); k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32* data2 = data1 + i; + data1 += t1->w << 2; + flagsp += 2; + } + if (k < t1->h) { + for (i = 0; i < t1->w; ++i) { + OPJ_INT32* data2 = data1 + i; + for (j = 0; j < t1->h - k; ++j) { opj_t1_dec_sigpass_step_raw( t1, flagsp, data2, oneplushalf, - 0, /* vsc */ - 0U); + cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ + j); data2 += t1->w; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - 0, /* vsc */ - 1U); - data2 += t1->w; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - 0, /* vsc */ - 2U); - data2 += t1->w; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - 0, /* vsc */ - 3U); - data2 += t1->w; - flagsp ++; - } - data1 += t1->w << 2; - flagsp += 2; - } - if (k < t1->h) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32* data2 = data1 + i; - for (j = 0; j < t1->h - k; ++j) { - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - 0, /* vsc */ - j); - data2 += t1->w; - } - flagsp ++; } + flagsp ++; } } } -#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, w, h, flags_stride) \ +#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf; \ OPJ_UINT32 i, j, k; \ @@ -719,16 +625,16 @@ static void opj_t1_dec_sigpass_raw( if( flags != 0 ) { \ opj_t1_dec_sigpass_step_mqc_macro( \ flags, flagsp, flags_stride, data, \ - l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf); \ + l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \ opj_t1_dec_sigpass_step_mqc_macro( \ flags, flagsp, flags_stride, data, \ - l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf); \ + l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \ opj_t1_dec_sigpass_step_mqc_macro( \ flags, flagsp, flags_stride, data, \ - l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf); \ + l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \ opj_t1_dec_sigpass_step_mqc_macro( \ flags, flagsp, flags_stride, data, \ - l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf); \ + l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \ *flagsp = flags; \ } \ } \ @@ -738,71 +644,62 @@ static void opj_t1_dec_sigpass_raw( for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (j = 0; j < h - k; ++j) { \ opj_t1_dec_sigpass_step_mqc(t1, flagsp, \ - data + j * l_w, oneplushalf, j, flags_stride); \ + data + j * l_w, oneplushalf, j, flags_stride, vsc); \ } \ } \ } \ } -static void opj_t1_dec_sigpass_mqc_64x64( +static void opj_t1_dec_sigpass_mqc_64x64_novsc( opj_t1_t *t1, OPJ_INT32 bpno) { - opj_t1_dec_sigpass_mqc_internal(t1, bpno, 64, 64, 66); + opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66); } -static void opj_t1_dec_sigpass_mqc_generic( +static void opj_t1_dec_sigpass_mqc_64x64_vsc( opj_t1_t *t1, OPJ_INT32 bpno) { - opj_t1_dec_sigpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2); + opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66); } -static void opj_t1_dec_sigpass_mqc_vsc( +static void opj_t1_dec_sigpass_mqc_generic_novsc( opj_t1_t *t1, OPJ_INT32 bpno) { - OPJ_INT32 one, half, oneplushalf, vsc; - OPJ_UINT32 i, j, k; - OPJ_INT32 *data1 = t1->data; - opj_flag_t *flagsp = &T1_FLAGS(0, 0); - one = 1 << bpno; - half = one >> 1; - oneplushalf = one | half; - for (k = 0; k < (t1->h & ~3U); k += 4U) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 0, 0U); - data2 += t1->w; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 0, 1U); - data2 += t1->w; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 0, 2U); - data2 += t1->w; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flagsp, data2, oneplushalf, 1, 3U); - data2 += t1->w; - flagsp ++; + opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h, + t1->w + 2U); +} + +static void opj_t1_dec_sigpass_mqc_generic_vsc( + opj_t1_t *t1, + OPJ_INT32 bpno) +{ + opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h, + t1->w + 2U); +} + +static void opj_t1_dec_sigpass_mqc( + opj_t1_t *t1, + OPJ_INT32 bpno, + OPJ_INT32 cblksty) +{ + if (t1->w == 64 && t1->h == 64) { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno); + } else { + opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno); } - data1 += t1->w << 2; - flagsp += 2; - } - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - for (j = k; j < t1->h; ++j) { - vsc = (j == t1->h - 1) ? 1 : 0; - opj_t1_dec_sigpass_step_mqc_vsc( - t1, - flagsp, - data2, - oneplushalf, - vsc, - j - k); - data2 += t1->w; + } else { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno); + } else { + opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno); } - flagsp ++; } } - /** Encode refinement pass step */ @@ -813,17 +710,13 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, OPJ_INT32 one, OPJ_INT32 *nmsedec, OPJ_BYTE type, - OPJ_UINT32 ci, - OPJ_UINT32 vsc) + OPJ_UINT32 ci) { OPJ_UINT32 v; opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_UINT32 const shift_flags = - vsc ? - ((*flagsp >> (ci * 3U)) & ~(T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S)) - : (*flagsp >> (ci * 3U)); if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { @@ -892,40 +785,11 @@ static INLINE void opj_t1_dec_refpass_step_mqc( mqc->ct, poshalf); } -static INLINE void opj_t1_dec_refpass_step_mqc_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 poshalf, - OPJ_INT32 vsc, - OPJ_UINT32 ci) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 const shift_flags = - vsc ? - ((*flagsp >> (ci * 3U)) & ~(T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S)) - : - (*flagsp >> (ci * 3U)); - - - if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { - OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); - opj_mqc_setcurctx(mqc, ctxt); - opj_mqc_decode(v, mqc); - *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; - *flagsp |= T1_MU_THIS << (ci * 3U); - } -} - static void opj_t1_enc_refpass( opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 *nmsedec, - OPJ_BYTE type, - OPJ_UINT32 cblksty) + OPJ_BYTE type) { OPJ_UINT32 i, k; const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); @@ -964,7 +828,7 @@ static void opj_t1_enc_refpass( one, nmsedec, type, - 0, 0); + 0); opj_t1_enc_refpass_step( t1, f, @@ -973,7 +837,7 @@ static void opj_t1_enc_refpass( one, nmsedec, type, - 1, 0); + 1); opj_t1_enc_refpass_step( t1, f, @@ -982,7 +846,7 @@ static void opj_t1_enc_refpass( one, nmsedec, type, - 2, 0); + 2); opj_t1_enc_refpass_step( t1, f, @@ -991,7 +855,7 @@ static void opj_t1_enc_refpass( one, nmsedec, type, - 3, cblksty & J2K_CCP_CBLKSTY_VSC); + 3); ++f; } f += extra; @@ -1020,8 +884,7 @@ static void opj_t1_enc_refpass( one, nmsedec, type, - j - k, - (j == t1->h - 1 && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); + j - k); } ++f; } @@ -1148,51 +1011,14 @@ static void opj_t1_dec_refpass_mqc_generic( opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U); } -static void opj_t1_dec_refpass_mqc_vsc( +static void opj_t1_dec_refpass_mqc( opj_t1_t *t1, OPJ_INT32 bpno) { - OPJ_INT32 one, poshalf; - OPJ_UINT32 i, j, k; - OPJ_INT32 vsc; - OPJ_INT32 *data1 = t1->data; - opj_flag_t *flagsp = &T1_FLAGS(0, 0); - one = 1 << bpno; - poshalf = one >> 1; - for (k = 0; k < (t1->h & ~(OPJ_UINT32)3U); k += 4U) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, - 0, 0U); - data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, - 0, 1U); - data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, - 0, 2U); - data2 += t1->w; - opj_t1_dec_refpass_step_mqc_vsc(t1, flagsp, data2, poshalf, - 1, 3U); - data2 += t1->w; - flagsp ++; - } - data1 += t1->w << 2; - flagsp += 2U; - } - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - for (j = k; j < t1->h; ++j) { - vsc = (j == t1->h - 1) ? 1 : 0; - opj_t1_dec_refpass_step_mqc_vsc( - t1, - flagsp, - data2, - poshalf, - vsc, - j - k); - data2 += t1->w; - } - flagsp ++; + if (t1->w == 64 && t1->h == 64) { + opj_t1_dec_refpass_mqc_64x64(t1, bpno); + } else { + opj_t1_dec_refpass_mqc_generic(t1, bpno); } } @@ -1233,13 +1059,9 @@ static void opj_t1_enc_clnpass_step( for (ci = runlen; ci < lim; ++ci) { OPJ_UINT32 vsc; - OPJ_UINT32 vsc_mask; opj_flag_t flags; - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == lim - 1)) ? 1 : 0; - vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | T1_CHI_S) << - (ci * 3U)) : ~0U; - flags = *flagsp & vsc_mask; + flags = *flagsp; if ((agg != 0) && (ci == runlen)) { goto LABEL_PARTIAL; @@ -1257,11 +1079,9 @@ static void opj_t1_enc_clnpass_step( OPJ_UINT32 ctxt2, spb; OPJ_UINT32 lu; LABEL_PARTIAL: - /* Note: using flags instead of *flagsp & vsc_mask result */ - /* in slow down. Probably because of register pressure */ lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp & vsc_mask, - flagsp[-1] & vsc_mask, flagsp[1] & vsc_mask, + *flagsp, + flagsp[-1], flagsp[1], ci); *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), (OPJ_UINT32)bpno); @@ -1277,7 +1097,8 @@ LABEL_PARTIAL: printf(" spb=%d\n", spb); #endif opj_mqc_encode(mqc, v ^ spb); - opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); + vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; + opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); } } *flagsp &= ~(T1_PI_THIS << (3U * ci)); @@ -1288,7 +1109,7 @@ LABEL_PARTIAL: #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ flags, flagsp, flags_stride, data, \ data_stride, ci, mqc, curctx, \ - v, a, c, ct, oneplushalf) \ + v, a, c, ct, oneplushalf, vsc) \ { \ if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\ do { \ @@ -1307,7 +1128,7 @@ LABEL_PARTIAL: opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ v = v ^ opj_t1_getspb(lu); \ data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \ - opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride); \ + opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \ } \ } while(0); \ } \ @@ -1318,7 +1139,8 @@ static void opj_t1_dec_clnpass_step( opj_flag_t *flagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_UINT32 ci) + OPJ_UINT32 ci, + OPJ_UINT32 vsc) { OPJ_UINT32 v; @@ -1326,48 +1148,7 @@ static void opj_t1_dec_clnpass_step( opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, *flagsp, flagsp, t1->w + 2U, datap, 0, ci, mqc, mqc->curctx, - v, mqc->a, mqc->c, mqc->ct, oneplushalf); -} - -static void opj_t1_dec_clnpass_step_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 oneplushalf, - OPJ_INT32 partial, - OPJ_INT32 vsc, - OPJ_UINT32 ci) -{ - OPJ_UINT32 v; - - opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - - OPJ_UINT32 vsc_mask = vsc ? ~((T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE | - T1_CHI_S) << - (ci * 3U)) : ~0U; - OPJ_UINT32 flags = *flagsp & vsc_mask; - if (partial) { - goto LABEL_PARTIAL; - } - if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); - opj_mqc_setcurctx(mqc, ctxt1); - opj_mqc_decode(v, mqc); - if (v) { - OPJ_UINT32 lu; -LABEL_PARTIAL: - lu = opj_t1_getctxtno_sc_or_spb_index( - *flagsp & vsc_mask, flagsp[-1] & vsc_mask, - flagsp[1] & vsc_mask, - ci); - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); - opj_mqc_decode(v, mqc); - v = v ^ opj_t1_getspb(lu); - *datap = v ? -oneplushalf : oneplushalf; - opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); - } - } - /* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ + v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc); } static void opj_t1_enc_clnpass( @@ -1382,10 +1163,6 @@ static void opj_t1_enc_clnpass( opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ - const OPJ_UINT32 agg_mask = (cblksty & J2K_CCP_CBLKSTY_VSC) ? - ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | T1_CHI_5) : - ~0U; - *nmsedec = 0; #ifdef DEBUG_ENC_CLN printf("enc_clnpass: bpno=%d\n", bpno); @@ -1398,7 +1175,7 @@ static void opj_t1_enc_clnpass( #ifdef DEBUG_ENC_CLN printf(" i=%d\n", i); #endif - agg = !(T1_FLAGS(i, k) & agg_mask); + agg = !(T1_FLAGS(i, k)); #ifdef DEBUG_ENC_CLN printf(" agg=%d\n", agg); #endif @@ -1458,170 +1235,167 @@ static void opj_t1_enc_clnpass( } } -#define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ +#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \ { \ - OPJ_INT32 one, half, oneplushalf, agg, vsc; \ - OPJ_UINT32 runlen, v; \ + OPJ_INT32 one, half, oneplushalf; \ + OPJ_UINT32 runlen; \ OPJ_UINT32 i, j, k; \ - OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ const OPJ_UINT32 l_w = w; \ opj_mqc_t* mqc = &(t1->mqc); \ + register OPJ_INT32 *data = t1->data; \ + register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ + DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + register OPJ_UINT32 v; \ one = 1 << bpno; \ half = one >> 1; \ oneplushalf = one | half; \ - if (cblksty & J2K_CCP_CBLKSTY_VSC) { \ - opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ - const OPJ_UINT32 agg_mask = ~(T1_SIGMA_15 | T1_SIGMA_16 | T1_SIGMA_17 | T1_CHI_5); \ - for (k = 0; k < h; k += 4) { \ - for (i = 0; i < w; ++i) { \ - opj_flag_t *flagsp2 = flagsp1 + i; \ - if (k + 3 < h) { \ - agg = !(*flagsp2 & agg_mask); \ - } else { \ - agg = 0; \ - } \ - if (agg) { \ - opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ - opj_mqc_decode(v, mqc); \ + for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + opj_flag_t flags = *flagsp; \ + if (flags == 0) { \ + OPJ_UINT32 partial = OPJ_TRUE; \ + opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ if (!v) { \ continue; \ } \ - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ - opj_mqc_decode(runlen, mqc); \ - opj_mqc_decode(v, mqc); \ + opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \ + opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \ + opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ runlen = (runlen << 1) | v; \ + switch(runlen) { \ + case 0: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\ + flags, flagsp, flags_stride, data, \ + l_w, 0, mqc, curctx, \ + v, a, c, ct, oneplushalf, vsc); \ + partial = OPJ_FALSE; \ + /* falltrough */ \ + case 1: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ + flags, flagsp, flags_stride, data, \ + l_w, 1, mqc, curctx, \ + v, a, c, ct, oneplushalf, OPJ_FALSE); \ + partial = OPJ_FALSE; \ + /* falltrough */ \ + case 2: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ + flags, flagsp, flags_stride, data, \ + l_w, 2, mqc, curctx, \ + v, a, c, ct, oneplushalf, OPJ_FALSE); \ + partial = OPJ_FALSE; \ + /* falltrough */ \ + case 3: \ + opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ + flags, flagsp, flags_stride, data, \ + l_w, 3, mqc, curctx, \ + v, a, c, ct, oneplushalf, OPJ_FALSE); \ + break; \ + } \ } else { \ - runlen = 0; \ - } \ - for (j = k + runlen; j < k + 4 && j < h; ++j) { \ - vsc = (j == k + 3 || j == h - 1) ? 1 : 0; \ - opj_t1_dec_clnpass_step_vsc( \ - t1, \ - flagsp2, \ - &t1->data[(j * w) + i], \ - oneplushalf, \ - agg && (j == k + runlen), \ - vsc, j - k); \ - } \ - *flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ - } \ - flagsp1 += flags_stride; \ - } \ - } else { \ - DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ - register OPJ_UINT32 v; \ - register OPJ_INT32 *data = t1->data; \ - register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ - for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \ - for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ - opj_flag_t flags = *flagsp; \ - if (flags == 0) { \ - OPJ_UINT32 partial = OPJ_TRUE; \ - opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \ - opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ - if (!v) { \ - continue; \ - } \ - opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \ - opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \ - opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \ - runlen = (runlen << 1) | v; \ - switch(runlen) { \ - case 0: \ - opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\ - flags, flagsp, flags_stride, data, \ - l_w, 0, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - partial = OPJ_FALSE; \ - /* falltrough */ \ - case 1: \ - opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ - flags, flagsp, flags_stride, data, \ - l_w, 1, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - partial = OPJ_FALSE; \ - /* falltrough */ \ - case 2: \ - opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ - flags, flagsp, flags_stride, data, \ - l_w, 2, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - partial = OPJ_FALSE; \ - /* falltrough */ \ - case 3: \ - opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\ - flags, flagsp, flags_stride, data, \ - l_w, 3, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - break; \ - } \ - } else { \ - opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ - flags, flagsp, flags_stride, data, \ - l_w, 0, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ - flags, flagsp, flags_stride, data, \ - l_w, 1, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ - flags, flagsp, flags_stride, data, \ - l_w, 2, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ - flags, flagsp, flags_stride, data, \ - l_w, 3, mqc, curctx, \ - v, a, c, ct, oneplushalf); \ - } \ - *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ - } \ - } \ - UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ - if( k < h ) { \ - for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ - for (j = 0; j < h - k; ++j) { \ - opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j); \ - } \ - *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 0, mqc, curctx, \ + v, a, c, ct, oneplushalf, vsc); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 1, mqc, curctx, \ + v, a, c, ct, oneplushalf, OPJ_FALSE); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 2, mqc, curctx, \ + v, a, c, ct, oneplushalf, OPJ_FALSE); \ + opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \ + flags, flagsp, flags_stride, data, \ + l_w, 3, mqc, curctx, \ + v, a, c, ct, oneplushalf, OPJ_FALSE); \ } \ + *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ } \ - \ - if (segsym) { \ - OPJ_UINT32 v, v2; \ - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ - opj_mqc_decode(v, mqc); \ - opj_mqc_decode(v2, mqc); \ - v = (v << 1) | v2; \ - opj_mqc_decode(v2, mqc); \ - v = (v << 1) | v2; \ - opj_mqc_decode(v2, mqc); \ - v = (v << 1) | v2; \ - /* \ - if (v!=0xa) { \ - opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); \ + UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ + for (j = 0; j < h - k; ++j) { \ + opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \ + } \ + *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ } \ - */ \ } \ } -static void opj_t1_dec_clnpass_64x64( - opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 cblksty) +static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty) { - opj_t1_dec_clnpass_internal(t1, bpno, cblksty, 64, 64, 66); + if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) { + opj_mqc_t* mqc = &(t1->mqc); + OPJ_UINT32 v, v2; + opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); + opj_mqc_decode(v, mqc); + opj_mqc_decode(v2, mqc); + v = (v << 1) | v2; + opj_mqc_decode(v2, mqc); + v = (v << 1) | v2; + opj_mqc_decode(v2, mqc); + v = (v << 1) | v2; + /* + if (v!=0xa) { + opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); + } + */ + } } -static void opj_t1_dec_clnpass_generic( +static void opj_t1_dec_clnpass_64x64_novsc( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 cblksty) + OPJ_INT32 bpno) { - opj_t1_dec_clnpass_internal(t1, bpno, cblksty, t1->w, t1->h, + opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66); +} + +static void opj_t1_dec_clnpass_64x64_vsc( + opj_t1_t *t1, + OPJ_INT32 bpno) +{ + opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66); +} + +static void opj_t1_dec_clnpass_generic_novsc( + opj_t1_t *t1, + OPJ_INT32 bpno) +{ + opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h, t1->w + 2U); } +static void opj_t1_dec_clnpass_generic_vsc( + opj_t1_t *t1, + OPJ_INT32 bpno) +{ + opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h, + t1->w + 2U); +} + +static void opj_t1_dec_clnpass( + opj_t1_t *t1, + OPJ_INT32 bpno, + OPJ_INT32 cblksty) +{ + if (t1->w == 64 && t1->h == 64) { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_clnpass_64x64_vsc(t1, bpno); + } else { + opj_t1_dec_clnpass_64x64_novsc(t1, bpno); + } + } else { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_clnpass_generic_vsc(t1, bpno); + } else { + opj_t1_dec_clnpass_generic_novsc(t1, bpno); + } + } + opj_t1_dec_clnpass_check_segsym(t1, cblksty); +} + /** mod fixed_quality */ static OPJ_FLOAT64 opj_t1_getwmsedec( @@ -2063,89 +1837,37 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } } - if (t1->w == 64 && t1->h == 64) { - for (passno = 0; (passno < seg->real_num_passes) && - (bpno_plus_one >= 1); ++passno) { - switch (passtype) { - case 0: - if (type == T1_TYPE_RAW) { - opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); - } else { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one); - } else { - opj_t1_dec_sigpass_mqc_64x64(t1, bpno_plus_one); - } - } - break; - case 1: - if (type == T1_TYPE_RAW) { - opj_t1_dec_refpass_raw(t1, bpno_plus_one); - } else { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); - } else { - opj_t1_dec_refpass_mqc_64x64(t1, bpno_plus_one); - } - } - break; - case 2: - opj_t1_dec_clnpass_64x64(t1, bpno_plus_one, (OPJ_INT32)cblksty); - break; + for (passno = 0; (passno < seg->real_num_passes) && + (bpno_plus_one >= 1); ++passno) { + switch (passtype) { + case 0: + if (type == T1_TYPE_RAW) { + opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); + } else { + opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty); } - - if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { - opj_mqc_resetstates(mqc); - opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); - opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); - opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); - } - if (++passtype == 3) { - passtype = 0; - bpno_plus_one--; + break; + case 1: + if (type == T1_TYPE_RAW) { + opj_t1_dec_refpass_raw(t1, bpno_plus_one); + } else { + opj_t1_dec_refpass_mqc(t1, bpno_plus_one); } + break; + case 2: + opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty); + break; } - } else { - for (passno = 0; (passno < seg->real_num_passes) && - (bpno_plus_one >= 1); ++passno) { - switch (passtype) { - case 0: - if (type == T1_TYPE_RAW) { - opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); - } else { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one); - } else { - opj_t1_dec_sigpass_mqc_generic(t1, bpno_plus_one); - } - } - break; - case 1: - if (type == T1_TYPE_RAW) { - opj_t1_dec_refpass_raw(t1, bpno_plus_one); - } else { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); - } else { - opj_t1_dec_refpass_mqc_generic(t1, bpno_plus_one); - } - } - break; - case 2: - opj_t1_dec_clnpass_generic(t1, bpno_plus_one, (OPJ_INT32)cblksty); - break; - } - if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { - opj_mqc_resetstates(mqc); - opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); - opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); - opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); - } - if (++passtype == 3) { - passtype = 0; - bpno_plus_one--; - } + if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { + opj_mqc_resetstates(mqc); + opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); + opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); + opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); + } + if (++passtype == 3) { + passtype = 0; + bpno_plus_one--; } } } @@ -2317,7 +2039,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty); break; case 1: - opj_t1_enc_refpass(t1, bpno, &nmsedec, type, cblksty); + opj_t1_enc_refpass(t1, bpno, &nmsedec, type); break; case 2: opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty); From 9b39fc4bccb3e9e4e9a9f61c4dc672fb7f05d865 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 2 Jun 2017 14:32:12 +0200 Subject: [PATCH 17/19] Fix documentation of opj_t1_decode_cblks() --- src/lib/openjp2/t1.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 17123265..24acb939 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -224,7 +224,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, /** Decode the code-blocks of a tile -@param t1 T1 handle +@param tp Thread pool +@param pret Pointer to return value @param tilec The tile to decode @param tccp Tile coding parameters */ From 532243f1fd9997db63ea7f6b199d21138ccf58a3 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 2 Jun 2017 16:49:26 +0200 Subject: [PATCH 18/19] MQC/RAW decoder: use an artificial 0xFF 0xFF terminating marker. This saves comparing the current pointer with the end of buffer pointer. This results at least in tiny speed improvement for raw decoding, and smaller code size for MQC as well. This kills the remains of the raw.h/.c files that were only used for decoding. Encoding using the mqc structure already. --- src/lib/openjp2/CMakeLists.txt | 2 - src/lib/openjp2/j2k.h | 2 - src/lib/openjp2/mqc.c | 45 ++++++++-- src/lib/openjp2/mqc.h | 64 ++++++++++++- src/lib/openjp2/mqc_inl.h | 47 +++++++--- src/lib/openjp2/{raw_inl.h => opj_common.h} | 38 +++----- src/lib/openjp2/opj_includes.h | 1 - src/lib/openjp2/raw.c | 65 -------------- src/lib/openjp2/raw.h | 99 --------------------- src/lib/openjp2/t1.c | 23 ++--- src/lib/openjp2/t1.h | 2 - src/lib/openjp2/t2.c | 13 ++- src/lib/openjp2/tcd.c | 5 +- 13 files changed, 173 insertions(+), 233 deletions(-) rename src/lib/openjp2/{raw_inl.h => opj_common.h} (65%) delete mode 100644 src/lib/openjp2/raw.c delete mode 100644 src/lib/openjp2/raw.h diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index cfc49028..ad77c6e3 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -38,8 +38,6 @@ set(OPENJPEG_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/opj_clock.h ${CMAKE_CURRENT_SOURCE_DIR}/pi.c ${CMAKE_CURRENT_SOURCE_DIR}/pi.h - ${CMAKE_CURRENT_SOURCE_DIR}/raw.c - ${CMAKE_CURRENT_SOURCE_DIR}/raw.h ${CMAKE_CURRENT_SOURCE_DIR}/t1.c ${CMAKE_CURRENT_SOURCE_DIR}/t1.h ${CMAKE_CURRENT_SOURCE_DIR}/t2.c diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h index fe65cd87..b59477bd 100644 --- a/src/lib/openjp2/j2k.h +++ b/src/lib/openjp2/j2k.h @@ -65,8 +65,6 @@ The functions in J2K.C have for goal to read/write the several parts of the code #define J2K_CCP_QNTSTY_SIQNT 1 #define J2K_CCP_QNTSTY_SEQNT 2 -#define OPJ_J2K_DEFAULT_CBLK_DATA_SIZE 8192 - /* ----------------------------------------------------------------------- */ #define J2K_MS_SOC 0xff4f /**< SOC marker value */ diff --git a/src/lib/openjp2/mqc.c b/src/lib/openjp2/mqc.c index bd4054c7..c4658cba 100644 --- a/src/lib/openjp2/mqc.c +++ b/src/lib/openjp2/mqc.c @@ -38,6 +38,8 @@ #include "opj_includes.h" +#include + /** @defgroup MQC MQC - Implementation of an MQ-Coder */ /*@{*/ @@ -423,17 +425,35 @@ void opj_mqc_segmark_enc(opj_mqc_t *mqc) } } -OPJ_BOOL opj_mqc_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len) +static void opj_mqc_init_dec_common(opj_mqc_t *mqc, + OPJ_BYTE *bp, + OPJ_UINT32 len, + OPJ_UINT32 extra_writable_bytes) +{ + (void)extra_writable_bytes; + + assert(extra_writable_bytes >= OPJ_COMMON_CBLK_DATA_EXTRA); + mqc->start = bp; + mqc->end = bp + len; + /* Insert an artificial 0xFF 0xFF marker at end of the code block */ + /* data so that the bytein routines stop on it. This saves us comparing */ + /* the bp and end pointers */ + /* But before inserting it, backup th bytes we will overwrite */ + memcpy(mqc->backup, mqc->end, OPJ_COMMON_CBLK_DATA_EXTRA); + mqc->end[0] = 0xFF; + mqc->end[1] = 0xFF; + mqc->bp = bp; +} +void opj_mqc_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len, + OPJ_UINT32 extra_writable_bytes) { /* Implements ISO 15444-1 C.3.5 Initialization of the decoder (INITDEC) */ /* Note: alternate "J.1 - Initialization of the software-conventions */ /* decoder" has been tried, but does */ /* not bring any improvement. */ /* See https://github.com/uclouvain/openjpeg/issues/921 */ + opj_mqc_init_dec_common(mqc, bp, len, extra_writable_bytes); opj_mqc_setcurctx(mqc, 0); - mqc->start = bp; - mqc->end = bp + len; - mqc->bp = bp; if (len == 0) { mqc->c = 0xff << 16; } else { @@ -444,7 +464,22 @@ OPJ_BOOL opj_mqc_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len) mqc->c <<= 7; mqc->ct -= 7; mqc->a = 0x8000; - return OPJ_TRUE; +} + + +void opj_mqc_raw_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len, + OPJ_UINT32 extra_writable_bytes) +{ + opj_mqc_init_dec_common(mqc, bp, len, extra_writable_bytes); + mqc->c = 0; + mqc->ct = 0; +} + + +void opq_mqc_finish_dec(opj_mqc_t *mqc) +{ + /* Restore the bytes overwritten by opj_mqc_init_dec_common() */ + memcpy(mqc->end, mqc->backup, OPJ_COMMON_CBLK_DATA_EXTRA); } void opj_mqc_resetstates(opj_mqc_t *mqc) diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h index 1c0d848b..a29ecb67 100644 --- a/src/lib/openjp2/mqc.h +++ b/src/lib/openjp2/mqc.h @@ -38,6 +38,9 @@ #ifndef __MQC_H #define __MQC_H + +#include "opj_common.h" + /** @file mqc.h @brief Implementation of an MQ-Coder (MQC) @@ -69,16 +72,26 @@ typedef struct opj_mqc_state { MQ coder */ typedef struct opj_mqc { + /** temporary buffer where bits are coded or decoded */ OPJ_UINT32 c; + /** only used by MQ decoder */ OPJ_UINT32 a; + /** number of bits already read or free to write */ OPJ_UINT32 ct; + /** pointer to the current position in the buffer */ OPJ_BYTE *bp; + /** pointer to the start of the buffer */ OPJ_BYTE *start; + /** pointer to the end of the buffer */ OPJ_BYTE *end; + /** Array of contexts */ opj_mqc_state_t *ctxs[MQC_NUMCTXS]; + /** Active context */ opj_mqc_state_t **curctx; /* lut_ctxno_zc shifted by (1 << 9) * bandno */ const OPJ_BYTE* lut_ctxno_zc_orient; + /** Original value of the 2 bytes at end[0] and end[1] */ + OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA]; } opj_mqc_t; #include "mqc_inl.h" @@ -179,13 +192,60 @@ SEGMARK mode switch (SEGSYM) @param mqc MQC handle */ void opj_mqc_segmark_enc(opj_mqc_t *mqc); + /** -Initialize the decoder +Initialize the decoder for MQ decoding. + +opj_mqc_finish_dec() must be absolutely called after finishing the decoding +passes, so as to restore the bytes temporarily overwritten. + @param mqc MQC handle @param bp Pointer to the start of the buffer from which the bytes will be read + Note that OPJ_COMMON_CBLK_DATA_EXTRA bytes at the end of the buffer + will be temporarily overwritten with an artificial 0xFF 0xFF marker. + (they will be backuped in the mqc structure to be restored later) + So bp must be at least len + OPJ_COMMON_CBLK_DATA_EXTRA large, and + writable. @param len Length of the input buffer +@param extra_writable_bytes Indicate how many bytes after len are writable. + This is to indicate your consent that bp must be + large enough. */ -OPJ_BOOL opj_mqc_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len); +void opj_mqc_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len, + OPJ_UINT32 extra_writable_bytes); + +/** +Initialize the decoder for RAW decoding. + +opj_mqc_finish_dec() must be absolutely called after finishing the decoding +passes, so as to restore the bytes temporarily overwritten. + +@param mqc MQC handle +@param bp Pointer to the start of the buffer from which the bytes will be read + Note that OPJ_COMMON_CBLK_DATA_EXTRA bytes at the end of the buffer + will be temporarily overwritten with an artificial 0xFF 0xFF marker. + (they will be backuped in the mqc structure to be restored later) + So bp must be at least len + OPJ_COMMON_CBLK_DATA_EXTRA large, and + writable. +@param len Length of the input buffer +@param extra_writable_bytes Indicate how many bytes after len are writable. + This is to indicate your consent that bp must be + large enough. +*/ +void opj_mqc_raw_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len, + OPJ_UINT32 extra_writable_bytes); + + +/** +Terminate RAW/MQC decoding + +This restores the bytes temporarily overwritten by opj_mqc_init_dec()/ +opj_mqc_raw_init_dec() + +@param mqc MQC handle +*/ +void opq_mqc_finish_dec(opj_mqc_t *mqc); + /** Decode a symbol @param mqc MQC handle diff --git a/src/lib/openjp2/mqc_inl.h b/src/lib/openjp2/mqc_inl.h index b2c7eef3..095a9fcf 100644 --- a/src/lib/openjp2/mqc_inl.h +++ b/src/lib/openjp2/mqc_inl.h @@ -65,15 +65,46 @@ } \ } + +/** +Decode a symbol using raw-decoder. Cfr p.506 TAUBMAN +@param mqc MQC handle +@return Returns the decoded symbol (0 or 1) +*/ +static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc) +{ + OPJ_UINT32 d; + if (mqc->ct == 0) { + /* Given opj_mqc_raw_init_dec() we know that at some point we will */ + /* have a 0xFF 0xFF artificial marker */ + if (mqc->c == 0xff) { + if (*mqc->bp > 0x8f) { + mqc->c = 0xff; + mqc->ct = 8; + } else { + mqc->c = *mqc->bp; + mqc->bp ++; + mqc->ct = 7; + } + } else { + mqc->c = *mqc->bp; + mqc->bp ++; + mqc->ct = 8; + } + } + mqc->ct--; + d = ((OPJ_UINT32)mqc->c >> mqc->ct) & 0x01U; + + return d; +} + + #define opj_mqc_bytein_macro(mqc, c, ct) \ { \ - if (mqc->bp != mqc->end) { \ OPJ_UINT32 l_c; \ - if (mqc->bp + 1 != mqc->end) { \ - l_c = *(mqc->bp + 1); \ - } else { \ - l_c = 0xff; \ - } \ + /* Given opj_mqc_init_dec() we know that at some point we will */ \ + /* have a 0xFF 0xFF artificial marker */ \ + l_c = *(mqc->bp + 1); \ if (*mqc->bp == 0xff) { \ if (l_c > 0x8f) { \ c += 0xff00; \ @@ -88,10 +119,6 @@ c += l_c << 8; \ ct = 8; \ } \ - } else { \ - c += 0xff00; \ - ct = 8; \ - } \ } /* For internal use of opj_mqc_decode_macro() */ diff --git a/src/lib/openjp2/raw_inl.h b/src/lib/openjp2/opj_common.h similarity index 65% rename from src/lib/openjp2/raw_inl.h rename to src/lib/openjp2/opj_common.h index b2ec0591..8db83fc5 100644 --- a/src/lib/openjp2/raw_inl.h +++ b/src/lib/openjp2/opj_common.h @@ -4,11 +4,7 @@ * party and contributor rights, including patent rights, and no such rights * are granted under this license. * - * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium - * Copyright (c) 2002-2014, Professor Benoit Macq - * Copyright (c) 2003-2007, Francois-Olivier Devaux - * Copyright (c) 2003-2014, Antonin Descampe - * Copyright (c) 2005, Herve Drolon, FreeImage Team + * Copyright (c) 2017, IntoPIX SA * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,29 +28,15 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ +#ifndef OPJ_COMMMON_H +#define OPJ_COMMMON_H -/** -Decode a symbol using raw-decoder. Cfr p.506 TAUBMAN -@param raw RAW handle -@return Returns the decoded symbol (0 or 1) +/* + ========================================================== + Common constants shared among several modules + ========================================================== */ -static INLINE OPJ_UINT32 opj_raw_decode(opj_raw_t *raw) -{ - OPJ_UINT32 d; - if (raw->ct == 0) { - raw->ct = 8; - if (raw->len == raw->lenmax) { - raw->c = 0xff; - } else { - if (raw->c == 0xff) { - raw->ct = 7; - } - raw->c = *(raw->start + raw->len); - raw->len++; - } - } - raw->ct--; - d = ((OPJ_UINT32)raw->c >> raw->ct) & 0x01U; +#define OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE 8192 +#define OPJ_COMMON_CBLK_DATA_EXTRA 2 /**< Margin for a fake FFFF marker */ - return d; -} +#endif /* OPJ_COMMMON_H */ diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index 98ade3fd..49aa3322 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -207,7 +207,6 @@ typedef unsigned int OPJ_BITFIELD; #include "jp2.h" #include "mqc.h" -#include "raw.h" #include "bio.h" #include "pi.h" diff --git a/src/lib/openjp2/raw.c b/src/lib/openjp2/raw.c deleted file mode 100644 index 03cc90c0..00000000 --- a/src/lib/openjp2/raw.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * The copyright in this software is being made available under the 2-clauses - * BSD License, included below. This software may be subject to other third - * party and contributor rights, including patent rights, and no such rights - * are granted under this license. - * - * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium - * Copyright (c) 2002-2014, Professor Benoit Macq - * Copyright (c) 2003-2007, Francois-Olivier Devaux - * Copyright (c) 2003-2014, Antonin Descampe - * Copyright (c) 2005, Herve Drolon, FreeImage Team - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "opj_includes.h" - -/* -========================================================== - local functions -========================================================== -*/ - - -/* -========================================================== - RAW encoding interface -========================================================== -*/ - -OPJ_UINT32 opj_raw_numbytes(opj_raw_t *raw) -{ - const ptrdiff_t diff = raw->bp - raw->start; - assert(diff <= (ptrdiff_t)0xffffffff && diff >= 0); /* UINT32_MAX */ - return (OPJ_UINT32)diff; -} - -void opj_raw_init_dec(opj_raw_t *raw, OPJ_BYTE *bp, OPJ_UINT32 len) -{ - raw->start = bp; - raw->lenmax = len; - raw->len = 0; - raw->c = 0; - raw->ct = 0; -} diff --git a/src/lib/openjp2/raw.h b/src/lib/openjp2/raw.h deleted file mode 100644 index 1f55ed5e..00000000 --- a/src/lib/openjp2/raw.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * The copyright in this software is being made available under the 2-clauses - * BSD License, included below. This software may be subject to other third - * party and contributor rights, including patent rights, and no such rights - * are granted under this license. - * - * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium - * Copyright (c) 2002-2014, Professor Benoit Macq - * Copyright (c) 2003-2007, Francois-Olivier Devaux - * Copyright (c) 2003-2014, Antonin Descampe - * Copyright (c) 2005, Herve Drolon, FreeImage Team - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __RAW_H -#define __RAW_H -/** -@file raw.h -@brief Implementation of operations for raw encoding (RAW) - -The functions in RAW.C have for goal to realize the operation of raw encoding linked -with the corresponding mode switch. -*/ - -/** @defgroup RAW RAW - Implementation of operations for raw encoding */ -/*@{*/ - -/** -RAW encoding operations -*/ -typedef struct opj_raw { - /** temporary buffer where bits are coded or decoded */ - OPJ_BYTE c; - /** number of bits already read or free to write */ - OPJ_UINT32 ct; - /** maximum length to decode */ - OPJ_UINT32 lenmax; - /** length decoded */ - OPJ_UINT32 len; - /** pointer to the current position in the buffer */ - OPJ_BYTE *bp; - /** pointer to the start of the buffer */ - OPJ_BYTE *start; - /** pointer to the end of the buffer */ - OPJ_BYTE *end; -} opj_raw_t; - -#include "raw_inl.h" - -/** @name Exported functions */ -/*@{*/ -/* ----------------------------------------------------------------------- */ - -/** -Return the number of bytes written/read since initialisation -@param raw RAW handle to destroy -@return Returns the number of bytes already encoded -*/ -OPJ_UINT32 opj_raw_numbytes(opj_raw_t *raw); -/** -Initialize the decoder -@param raw RAW handle -@param bp Pointer to the start of the buffer from which the bytes will be read -@param len Length of the input buffer -*/ -void opj_raw_init_dec(opj_raw_t *raw, OPJ_BYTE *bp, OPJ_UINT32 len); -/** -Decode a symbol using raw-decoder. Cfr p.506 TAUBMAN -@param raw RAW handle -@return Returns the decoded symbol (0 or 1) -*/ -static INLINE OPJ_UINT32 opj_raw_decode(opj_raw_t *raw); -/* ----------------------------------------------------------------------- */ -/*@}*/ - -/*@}*/ - -#endif /* __RAW_H */ diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index da26227d..924403bf 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -376,14 +376,14 @@ static INLINE void opj_t1_dec_sigpass_step_raw( OPJ_UINT32 ci) { OPJ_UINT32 v; - opj_raw_t *raw = &(t1->raw); /* RAW component */ + opj_mqc_t *mqc = &(t1->mqc); /* RAW component */ OPJ_UINT32 const flags = *flagsp; if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { - if (opj_raw_decode(raw)) { - v = opj_raw_decode(raw); + if (opj_mqc_raw_decode(mqc)) { + v = opj_mqc_raw_decode(mqc); *datap = v ? -oneplushalf : oneplushalf; opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); } @@ -747,11 +747,11 @@ static INLINE void opj_t1_dec_refpass_step_raw( { OPJ_UINT32 v; - opj_raw_t *raw = &(t1->raw); /* RAW component */ + opj_mqc_t *mqc = &(t1->mqc); /* RAW component */ if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == (T1_SIGMA_THIS << (ci * 3U))) { - v = opj_raw_decode(raw); + v = opj_mqc_raw_decode(mqc); *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; *flagsp |= T1_MU_THIS << (ci * 3U); } @@ -1793,7 +1793,6 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, OPJ_UINT32 roishift, OPJ_UINT32 cblksty) { - opj_raw_t *raw = &(t1->raw); /* RAW component */ opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ OPJ_INT32 bpno_plus_one; @@ -1829,12 +1828,11 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, continue; } if (type == T1_TYPE_RAW) { - opj_raw_init_dec(raw, (*seg->data) + seg->dataindex, seg->len); + opj_mqc_raw_init_dec(mqc, (*seg->data) + seg->dataindex, seg->len, + OPJ_COMMON_CBLK_DATA_EXTRA); } else { - if (OPJ_FALSE == opj_mqc_init_dec(mqc, (*seg->data) + seg->dataindex, - seg->len)) { - return OPJ_FALSE; - } + opj_mqc_init_dec(mqc, (*seg->data) + seg->dataindex, seg->len, + OPJ_COMMON_CBLK_DATA_EXTRA); } for (passno = 0; (passno < seg->real_num_passes) && @@ -1870,7 +1868,10 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, bpno_plus_one--; } } + + opq_mqc_finish_dec(mqc); } + return OPJ_TRUE; } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 24acb939..9d4245eb 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -185,8 +185,6 @@ typedef struct opj_t1 { /** MQC component */ opj_mqc_t mqc; - /** RAW component */ - opj_raw_t raw; OPJ_INT32 *data; /** Flags used by decoder and encoder. diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c index b0990963..7d27f688 100644 --- a/src/lib/openjp2/t2.c +++ b/src/lib/openjp2/t2.c @@ -38,6 +38,8 @@ */ #include "opj_includes.h" +#include "opj_common.h" + /** @defgroup T2 T2 - Implementation of a tier-2 coding */ /*@{*/ @@ -1233,7 +1235,8 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, #endif /* USE_JPWL */ /* Check possible overflow on size */ - if ((l_cblk->data_current_size + l_seg->newlen) < l_cblk->data_current_size) { + if ((l_cblk->data_current_size + l_seg->newlen + OPJ_COMMON_CBLK_DATA_EXTRA) < + l_cblk->data_current_size) { opj_event_msg(p_manager, EVT_ERROR, "read: segment too long (%d) with current size (%d > %d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n", l_seg->newlen, l_cblk->data_current_size, 0xFFFFFFFF - l_seg->newlen, cblkno, @@ -1241,9 +1244,10 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, return OPJ_FALSE; } /* Check if the cblk->data have allocated enough memory */ - if ((l_cblk->data_current_size + l_seg->newlen) > l_cblk->data_max_size) { + if ((l_cblk->data_current_size + l_seg->newlen + OPJ_COMMON_CBLK_DATA_EXTRA) > + l_cblk->data_max_size) { OPJ_BYTE* new_cblk_data = (OPJ_BYTE*) opj_realloc(l_cblk->data, - l_cblk->data_current_size + l_seg->newlen); + l_cblk->data_current_size + l_seg->newlen + OPJ_COMMON_CBLK_DATA_EXTRA); if (! new_cblk_data) { opj_free(l_cblk->data); l_cblk->data = NULL; @@ -1251,7 +1255,8 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, /* opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to realloc code block cata!\n"); */ return OPJ_FALSE; } - l_cblk->data_max_size = l_cblk->data_current_size + l_seg->newlen; + l_cblk->data_max_size = l_cblk->data_current_size + l_seg->newlen + + OPJ_COMMON_CBLK_DATA_EXTRA; l_cblk->data = new_cblk_data; } diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index f1e1c1c3..dc5c89b9 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -39,6 +39,7 @@ */ #include "opj_includes.h" +#include "opj_common.h" /* ----------------------------------------------------------------------- */ @@ -1185,11 +1186,11 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * { if (! p_code_block->data) { - p_code_block->data = (OPJ_BYTE*) opj_malloc(OPJ_J2K_DEFAULT_CBLK_DATA_SIZE); + p_code_block->data = (OPJ_BYTE*) opj_malloc(OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE); if (! p_code_block->data) { return OPJ_FALSE; } - p_code_block->data_max_size = OPJ_J2K_DEFAULT_CBLK_DATA_SIZE; + p_code_block->data_max_size = OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE; /*fprintf(stderr, "Allocate 8192 elements of code_block->data\n");*/ p_code_block->segs = (opj_tcd_seg_t *) opj_calloc(OPJ_J2K_DEFAULT_NB_SEGS, From 9a9b06911e164bdc854cf6b9c3bc5b6e751bf46d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 2 Jun 2017 19:22:15 +0200 Subject: [PATCH 19/19] opj_t1_dec_sigpass_raw/opj_t1_dec_refpass_raw: harmonize style with mqc methods --- src/lib/openjp2/t1.c | 164 ++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 89 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 924403bf..e9d44e01 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -542,66 +542,59 @@ static void opj_t1_dec_sigpass_raw( { OPJ_INT32 one, half, oneplushalf; OPJ_UINT32 i, j, k; - OPJ_INT32 *data1 = t1->data; + OPJ_INT32 *data = t1->data; opj_flag_t *flagsp = &T1_FLAGS(0, 0); + const OPJ_UINT32 l_w = t1->w; one = 1 << bpno; half = one >> 1; oneplushalf = one | half; - for (k = 0; k < (t1->h & ~3U); k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32* data2 = data1 + i; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ - 0U); - data2 += t1->w; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - OPJ_FALSE, /* vsc */ - 1U); - data2 += t1->w; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - OPJ_FALSE, /* vsc */ - 2U); - data2 += t1->w; - opj_t1_dec_sigpass_step_raw( - t1, - flagsp, - data2, - oneplushalf, - OPJ_FALSE, /* vsc */ - 3U); - data2 += t1->w; - flagsp ++; + for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) { + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { + opj_flag_t flags = *flagsp; + if (flags != 0) { + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data, + oneplushalf, + cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ + 0U); + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data + l_w, + oneplushalf, + OPJ_FALSE, /* vsc */ + 1U); + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data + 2 * l_w, + oneplushalf, + OPJ_FALSE, /* vsc */ + 2U); + opj_t1_dec_sigpass_step_raw( + t1, + flagsp, + data + 3 * l_w, + oneplushalf, + OPJ_FALSE, /* vsc */ + 3U); + } } - data1 += t1->w << 2; - flagsp += 2; } if (k < t1->h) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32* data2 = data1 + i; + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { for (j = 0; j < t1->h - k; ++j) { opj_t1_dec_sigpass_step_raw( t1, flagsp, - data2, + data + j * l_w, oneplushalf, cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */ j); - data2 += t1->w; } - flagsp ++; } } } @@ -898,59 +891,52 @@ static void opj_t1_dec_refpass_raw( { OPJ_INT32 one, poshalf; OPJ_UINT32 i, j, k; - OPJ_INT32 *data1 = t1->data; + OPJ_INT32 *data = t1->data; opj_flag_t *flagsp = &T1_FLAGS(0, 0); + const OPJ_UINT32 l_w = t1->w; one = 1 << bpno; poshalf = one >> 1; - for (k = 0; k < (t1->h & ~3U); k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_t1_dec_refpass_step_raw( - t1, - flagsp, - data2, - poshalf, - 0U); - data2 += t1->w; - opj_t1_dec_refpass_step_raw( - t1, - flagsp, - data2, - poshalf, - 1U); - data2 += t1->w; - opj_t1_dec_refpass_step_raw( - t1, - flagsp, - data2, - poshalf, - 2U); - data2 += t1->w; - opj_t1_dec_refpass_step_raw( - t1, - flagsp, - data2, - poshalf, - 3U); - data2 += t1->w; - flagsp ++; - } - data1 += t1->w << 2; - flagsp += 2; - } - if (k < t1->h) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - for (j = k; j < t1->h; ++j) { + for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) { + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { + opj_flag_t flags = *flagsp; + if (flags != 0) { opj_t1_dec_refpass_step_raw( t1, flagsp, - data2, + data, poshalf, - j - k); - data2 += t1->w; + 0U); + opj_t1_dec_refpass_step_raw( + t1, + flagsp, + data + l_w, + poshalf, + 1U); + opj_t1_dec_refpass_step_raw( + t1, + flagsp, + data + 2 * l_w, + poshalf, + 2U); + opj_t1_dec_refpass_step_raw( + t1, + flagsp, + data + 3 * l_w, + poshalf, + 3U); + } + } + } + if (k < t1->h) { + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { + for (j = 0; j < t1->h - k; ++j) { + opj_t1_dec_refpass_step_raw( + t1, + flagsp, + data + j * l_w, + poshalf, + j); } - flagsp ++; } } }