Force inlining of mqc decoding and pass steps through heavy use of macros, so as to get better register allocation

This commit is contained in:
Even Rouault 2017-06-02 11:52:16 +02:00
parent 7e8b502842
commit 3d9940a35b
3 changed files with 364 additions and 326 deletions

View File

@ -191,7 +191,7 @@ Decode a symbol
@param mqc MQC handle @param mqc MQC handle
@return Returns the decoded symbol (0 or 1) @return Returns the decoded symbol (0 or 1)
*/ */
static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t * const mqc); /*static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t * const mqc);*/
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
/*@}*/ /*@}*/

View File

@ -38,126 +38,131 @@
#ifndef __MQC_INL_H #ifndef __MQC_INL_H
#define __MQC_INL_H #define __MQC_INL_H
/**
FIXME DOC
@param mqc MQC handle
@return
*/
static INLINE OPJ_UINT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc)
{
OPJ_UINT32 d;
if (mqc->a < (*mqc->curctx)->qeval) {
d = !((*mqc->curctx)->mps);
*mqc->curctx = (*mqc->curctx)->nlps;
} else {
d = (*mqc->curctx)->mps;
*mqc->curctx = (*mqc->curctx)->nmps;
}
return d; /* For internal use of opj_mqc_decode_macro() */
#define opj_mqc_mpsexchange_macro(d, curctx, a) \
{ \
if (a < (*curctx)->qeval) { \
d = !((*curctx)->mps); \
*curctx = (*curctx)->nlps; \
} else { \
d = (*curctx)->mps; \
*curctx = (*curctx)->nmps; \
} \
} }
/** /* For internal use of opj_mqc_decode_macro() */
FIXME DOC #define opj_mqc_lpsexchange_macro(d, curctx, a) \
@param mqc MQC handle { \
@return if (a < (*curctx)->qeval) { \
*/ a = (*curctx)->qeval; \
static INLINE OPJ_UINT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc) d = (*curctx)->mps; \
{ *curctx = (*curctx)->nmps; \
OPJ_UINT32 d; } else { \
if (mqc->a < (*mqc->curctx)->qeval) { a = (*curctx)->qeval; \
mqc->a = (*mqc->curctx)->qeval; d = !((*curctx)->mps); \
d = (*mqc->curctx)->mps; *curctx = (*curctx)->nlps; \
*mqc->curctx = (*mqc->curctx)->nmps; } \
} else {
mqc->a = (*mqc->curctx)->qeval;
d = !((*mqc->curctx)->mps);
*mqc->curctx = (*mqc->curctx)->nlps;
}
return d;
} }
#define opj_mqc_bytein_macro(mqc, c, ct) \
{ \
if (mqc->bp != mqc->end) { \
OPJ_UINT32 l_c; \
if (mqc->bp + 1 != mqc->end) { \
l_c = *(mqc->bp + 1); \
} else { \
l_c = 0xff; \
} \
if (*mqc->bp == 0xff) { \
if (l_c > 0x8f) { \
c += 0xff00; \
ct = 8; \
} else { \
mqc->bp++; \
c += l_c << 9; \
ct = 7; \
} \
} else { \
mqc->bp++; \
c += l_c << 8; \
ct = 8; \
} \
} else { \
c += 0xff00; \
ct = 8; \
} \
}
/* For internal use of opj_mqc_decode_macro() */
#define opj_mqc_renormd_macro(mqc, a, c, ct) \
{ \
do { \
if (ct == 0) { \
opj_mqc_bytein_macro(mqc, c, ct); \
} \
a <<= 1; \
c <<= 1; \
ct--; \
} while (a < 0x8000); \
}
#define opj_mqc_decode_macro(d, mqc, curctx, a, c, ct) \
{ \
/* Implements ISO 15444-1 C.3.2 Decoding a decision (DECODE) */ \
/* Note: alternate "J.2 - Decoding an MPS or an LPS in the */ \
/* software-conventions decoder" has been tried, but does not bring any */ \
/* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */ \
a -= (*curctx)->qeval; \
if ((c >> 16) < (*curctx)->qeval) { \
opj_mqc_lpsexchange_macro(d, curctx, a); \
opj_mqc_renormd_macro(mqc, a, c, ct); \
} else { \
c -= (*curctx)->qeval << 16; \
if ((a & 0x8000) == 0) { \
opj_mqc_mpsexchange_macro(d, curctx, a); \
opj_mqc_renormd_macro(mqc, a, c, ct); \
} else { \
d = (*curctx)->mps; \
} \
} \
}
#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \
register opj_mqc_state_t **curctx = mqc->curctx; \
register OPJ_UINT32 c = mqc->c; \
register OPJ_UINT32 a = mqc->a; \
register OPJ_UINT32 ct = mqc->ct
#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \
mqc->curctx = curctx; \
mqc->c = c; \
mqc->a = a; \
mqc->ct = ct;
/** /**
Input a byte Input a byte
@param mqc MQC handle @param mqc MQC handle
*/ */
static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc) static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc)
{ {
/* Implements ISO 15444-1 C.3.4 Compressed image data input (BYTEIN) */ opj_mqc_bytein_macro(mqc, mqc->c, mqc->ct);
/* Note: alternate "J.3 - Inserting a new byte into the C register in the */
/* software-conventions decoder" has been tried, but does not bring any */
/* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */
if (mqc->bp != mqc->end) {
OPJ_UINT32 c;
if (mqc->bp + 1 != mqc->end) {
c = *(mqc->bp + 1);
} else {
c = 0xff;
}
if (*mqc->bp == 0xff) {
if (c > 0x8f) {
mqc->c += 0xff00;
mqc->ct = 8;
} else {
mqc->bp++;
mqc->c += c << 9;
mqc->ct = 7;
}
} else {
mqc->bp++;
mqc->c += c << 8;
mqc->ct = 8;
}
} else {
mqc->c += 0xff00;
mqc->ct = 8;
}
} }
/** /**
Renormalize mqc->a and mqc->c while decoding Renormalize mqc->a and mqc->c while decoding
@param mqc MQC handle @param mqc MQC handle
*/ */
static INLINE void opj_mqc_renormd(opj_mqc_t *const mqc) #define opj_mqc_renormd(mqc) \
{ opj_mqc_renormd_macro(mqc, mqc->a, mqc->c, mqc->ct)
do {
if (mqc->ct == 0) {
opj_mqc_bytein(mqc);
}
mqc->a <<= 1;
mqc->c <<= 1;
mqc->ct--;
} while (mqc->a < 0x8000);
}
/** /**
Decode a symbol Decode a symbol
@param d OPJ_UINT32 value where to store the decoded symbol
@param mqc MQC handle @param mqc MQC handle
@return Returns the decoded symbol (0 or 1) @return Returns the decoded symbol (0 or 1) in d
*/ */
static INLINE OPJ_UINT32 opj_mqc_decode(opj_mqc_t *const mqc) #define opj_mqc_decode(d, mqc) \
{ opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct)
/* Implements ISO 15444-1 C.3.2 Decoding a decision (DECODE) */
/* Note: alternate "J.2 - Decoding an MPS or an LPS in the */
/* software-conventions decoder" has been tried, but does not bring any */
/* improvement. See https://github.com/uclouvain/openjpeg/issues/921 */
OPJ_UINT32 d;
mqc->a -= (*mqc->curctx)->qeval;
if ((mqc->c >> 16) < (*mqc->curctx)->qeval) {
d = opj_mqc_lpsexchange(mqc);
opj_mqc_renormd(mqc);
} else {
mqc->c -= (*mqc->curctx)->qeval << 16;
if ((mqc->a & 0x8000) == 0) {
d = opj_mqc_mpsexchange(mqc);
opj_mqc_renormd(mqc);
} else {
d = (*mqc->curctx)->mps;
}
}
return d;
}
#endif /* __MQC_INL_H */ #endif /* __MQC_INL_H */

View File

@ -46,6 +46,8 @@
#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)]) #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
#define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
/** @name Local static functions */ /** @name Local static functions */
/*@{*/ /*@{*/
@ -154,12 +156,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
/** /**
Decode clean-up pass Decode clean-up pass
*/ */
static void opj_t1_dec_clnpass_step_partial(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 row);
static void opj_t1_dec_clnpass_step( static void opj_t1_dec_clnpass_step(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
@ -302,34 +299,39 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
} }
#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride ) \
{ \
/* east */ \
flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
\
/* mark target as significant */ \
flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
\
/* west */ \
flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
\
/* north-west, north, north-east */ \
if (ci == 0U) { \
opj_flag_t* north = flagsp - (stride); \
*north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
north[-1] |= T1_SIGMA_17; \
north[1] |= T1_SIGMA_15; \
} \
\
/* south-west, south, south-east */ \
if (ci == 3U) { \
opj_flag_t* south = flagsp + (stride); \
*south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
south[-1] |= T1_SIGMA_2; \
south[1] |= T1_SIGMA_0; \
} \
}
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
OPJ_UINT32 s, OPJ_UINT32 stride) OPJ_UINT32 s, OPJ_UINT32 stride)
{ {
/* east */ opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride);
flagsp[-1] |= T1_SIGMA_5 << (3U * ci);
/* mark target as significant */
*flagsp |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci);
/* west */
flagsp[1] |= T1_SIGMA_3 << (3U * ci);
/* north-west, north, north-east */
if (ci == 0U) {
opj_flag_t* north = flagsp - stride;
*north |= (s << T1_CHI_5_I) | T1_SIGMA_16;
north[-1] |= T1_SIGMA_17;
north[1] |= T1_SIGMA_15;
}
/* south-west, south, south-east */
if (ci == 3U) {
opj_flag_t* south = flagsp + stride;
*south |= (s << T1_CHI_0_I) | T1_SIGMA_1;
south[-1] |= T1_SIGMA_2;
south[1] |= T1_SIGMA_0;
}
} }
/** /**
@ -423,6 +425,32 @@ static INLINE void opj_t1_dec_sigpass_step_raw(
} }
} }
#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
data_stride, ci, mqc, curctx, \
v, a, c, ct, oneplushalf) \
{ \
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
opj_t1_setcurctx(curctx, ctxt1); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
if (v) { \
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
flags, \
flagsp[-1], flagsp[1], \
ci); \
OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
OPJ_UINT32 spb = opj_t1_getspb(lu); \
opj_t1_setcurctx(curctx, ctxt2); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
v = v ^ spb; \
data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride); \
} \
flags |= T1_PI_THIS << (ci * 3U); \
} \
}
static INLINE void opj_t1_dec_sigpass_step_mqc( static INLINE void opj_t1_dec_sigpass_step_mqc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
@ -434,26 +462,9 @@ static INLINE void opj_t1_dec_sigpass_step_mqc(
OPJ_UINT32 v; OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
OPJ_UINT32 const flags = *flagsp; opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
0, ci, mqc, mqc->curctx,
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && v, mqc->a, mqc->c, mqc->ct, oneplushalf);
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
*flagsp,
flagsp[-1], flagsp[1],
ci);
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
opj_mqc_setcurctx(mqc, ctxt1);
if (opj_mqc_decode(mqc)) {
OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
OPJ_UINT32 spb = opj_t1_getspb(lu);
opj_mqc_setcurctx(mqc, ctxt2);
v = opj_mqc_decode(mqc) ^ spb;
*datap = v ? -oneplushalf : oneplushalf;
opj_t1_update_flags(flagsp, ci, v, flags_stride);
}
*flagsp |= T1_PI_THIS << (ci * 3U);
}
} }
static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( static INLINE void opj_t1_dec_sigpass_step_mqc_vsc(
@ -480,11 +491,13 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc(
ci); ci);
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
opj_mqc_setcurctx(mqc, ctxt1); opj_mqc_setcurctx(mqc, ctxt1);
if (opj_mqc_decode(mqc)) { opj_mqc_decode(v, mqc);
if (v) {
OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
OPJ_UINT32 spb = opj_t1_getspb(lu); OPJ_UINT32 spb = opj_t1_getspb(lu);
opj_mqc_setcurctx(mqc, ctxt2); opj_mqc_setcurctx(mqc, ctxt2);
v = opj_mqc_decode(mqc) ^ spb; opj_mqc_decode(v, mqc);
v = v ^ spb;
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_update_flags(flagsp, ci, v, t1->w + 2); opj_t1_update_flags(flagsp, ci, v, t1->w + 2);
} }
@ -691,36 +704,43 @@ static void opj_t1_dec_sigpass_raw(
{ \ { \
OPJ_INT32 one, half, oneplushalf; \ OPJ_INT32 one, half, oneplushalf; \
OPJ_UINT32 i, j, k; \ OPJ_UINT32 i, j, k; \
OPJ_INT32 *data1 = t1->data; \ register OPJ_INT32 *data = t1->data; \
opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \ one = 1 << bpno; \
half = one >> 1; \ half = one >> 1; \
oneplushalf = one | half; \ oneplushalf = one | half; \
for (k = 0; k < (h & ~3u); k += 4) { \ for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
for (i = 0; i < w; ++i) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
OPJ_INT32 *data2 = data1 + i; \ opj_flag_t flags = *flagsp; \
if( *flagsp != 0 ) { \ if( flags != 0 ) { \
opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 0U, flags_stride); \ opj_t1_dec_sigpass_step_mqc_macro( \
data2 += w; \ flags, flagsp, flags_stride, data, \
opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 1U, flags_stride); \ l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf); \
data2 += w; \ opj_t1_dec_sigpass_step_mqc_macro( \
opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 2U, flags_stride); \ flags, flagsp, flags_stride, data, \
data2 += w; \ l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf); \
opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, 3U, flags_stride); \ opj_t1_dec_sigpass_step_mqc_macro( \
data2 += w; \ flags, flagsp, flags_stride, data, \
l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf); \
opj_t1_dec_sigpass_step_mqc_macro( \
flags, flagsp, flags_stride, data, \
l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf); \
*flagsp = flags; \
} \ } \
flagsp ++; \
} \ } \
data1 += w << 2; \
flagsp += 2; \
} \ } \
for (i = 0; i < w; ++i) { \ UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
OPJ_INT32 *data2 = data1 + i; \ if( k < h ) { \
for (j = k; j < h; ++j) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
opj_t1_dec_sigpass_step_mqc(t1, flagsp, data2, oneplushalf, j - k, flags_stride); \ for (j = 0; j < h - k; ++j) { \
data2 += w; \ opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
data + j * l_w, oneplushalf, j, flags_stride); \
} \ } \
flagsp ++; \ } \
} \ } \
} }
@ -844,6 +864,19 @@ static INLINE void opj_t1_dec_refpass_step_raw(
} }
} }
#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
mqc, curctx, v, a, c, ct, poshalf) \
{ \
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
(T1_SIGMA_THIS << (ci * 3U))) { \
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
opj_t1_setcurctx(curctx, ctxt); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
flags |= T1_MU_THIS << (ci * 3U); \
} \
}
static INLINE void opj_t1_dec_refpass_step_mqc( static INLINE void opj_t1_dec_refpass_step_mqc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
@ -854,15 +887,9 @@ static INLINE void opj_t1_dec_refpass_step_mqc(
OPJ_UINT32 v; OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == mqc, mqc->curctx, v, mqc->a, mqc->c,
(T1_SIGMA_THIS << (ci * 3U))) { mqc->ct, poshalf);
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(*flagsp >> (ci * 3U));
opj_mqc_setcurctx(mqc, ctxt);
v = opj_mqc_decode(mqc);
*datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
*flagsp |= T1_MU_THIS << (ci * 3U);
}
} }
static INLINE void opj_t1_dec_refpass_step_mqc_vsc( static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
@ -887,7 +914,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
opj_mqc_setcurctx(mqc, ctxt); opj_mqc_setcurctx(mqc, ctxt);
v = opj_mqc_decode(mqc); opj_mqc_decode(v, mqc);
*datap += (v ^ (*datap < 0)) ? poshalf : -poshalf; *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
*flagsp |= T1_MU_THIS << (ci * 3U); *flagsp |= T1_MU_THIS << (ci * 3U);
} }
@ -1069,35 +1096,41 @@ static void opj_t1_dec_refpass_raw(
{ \ { \
OPJ_INT32 one, poshalf; \ OPJ_INT32 one, poshalf; \
OPJ_UINT32 i, j, k; \ OPJ_UINT32 i, j, k; \
OPJ_INT32 *data1 = t1->data; \ register OPJ_INT32 *data = t1->data; \
opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \ one = 1 << bpno; \
poshalf = one >> 1; \ poshalf = one >> 1; \
for (k = 0; k < (h & ~3u); k += 4) { \ for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
for (i = 0; i < w; ++i) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
OPJ_INT32 *data2 = data1 + i; \ opj_flag_t flags = *flagsp; \
if( *flagsp != 0 ) { \ if( flags != 0 ) { \
opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 0U); \ opj_t1_dec_refpass_step_mqc_macro( \
data2 += w; \ flags, data, l_w, 0, \
opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 1U); \ mqc, curctx, v, a, c, ct, poshalf); \
data2 += w; \ opj_t1_dec_refpass_step_mqc_macro( \
opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 2U); \ flags, data, l_w, 1, \
data2 += w; \ mqc, curctx, v, a, c, ct, poshalf); \
opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, 3U); \ opj_t1_dec_refpass_step_mqc_macro( \
data2 += w; \ flags, data, l_w, 2, \
mqc, curctx, v, a, c, ct, poshalf); \
opj_t1_dec_refpass_step_mqc_macro( \
flags, data, l_w, 3, \
mqc, curctx, v, a, c, ct, poshalf); \
*flagsp = flags; \
} \ } \
flagsp ++; \
} \ } \
data1 += w << 2; \
flagsp += 2; \
} \ } \
for (i = 0; i < w; ++i) { \ UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
OPJ_INT32 *data2 = data1 + i; \ if( k < h ) { \
for (j = k; j < h; ++j) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
opj_t1_dec_refpass_step_mqc(t1, flagsp, data2, poshalf, j - k); \ for (j = 0; j < h - k; ++j) { \
data2 += w; \ opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
} \ } \
flagsp ++; \ } \
} \ } \
} }
@ -1252,25 +1285,32 @@ LABEL_PARTIAL:
} }
} }
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
static void opj_t1_dec_clnpass_step_partial( flags, flagsp, flags_stride, data, \
opj_t1_t *t1, data_stride, ci, mqc, curctx, \
opj_flag_t *flagsp, v, a, c, ct, oneplushalf) \
OPJ_INT32 *datap, { \
OPJ_INT32 oneplushalf, if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
OPJ_UINT32 ci) do { \
{ if( !partial ) { \
OPJ_UINT32 v; OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ opj_t1_setcurctx(curctx, ctxt1); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( if( !v ) \
*flagsp, flagsp[-1], flagsp[1], break; \
ci); } \
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); { \
v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
*datap = v ? -oneplushalf : oneplushalf; flags, flagsp[-1], flagsp[1], \
opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); ci); \
/* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */ opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
v = v ^ opj_t1_getspb(lu); \
data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride); \
} \
} while(0); \
} \
} }
static void opj_t1_dec_clnpass_step( static void opj_t1_dec_clnpass_step(
@ -1283,45 +1323,10 @@ static void opj_t1_dec_clnpass_step(
OPJ_UINT32 v; OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); *flagsp, flagsp, t1->w + 2U, datap,
opj_mqc_setcurctx(mqc, ctxt1); 0, ci, mqc, mqc->curctx,
if (opj_mqc_decode(mqc)) { v, mqc->a, mqc->c, mqc->ct, oneplushalf);
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
*flagsp, flagsp[-1], flagsp[1],
ci);
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu);
*datap = v ? -oneplushalf : oneplushalf;
opj_t1_update_flags(flagsp, ci, v, t1->w + 2U);
}
}
/* *flagsp &= ~(T1_PI_THIS << (3U * ci)); */
}
static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 ci,
OPJ_UINT32 flags_stride)
{
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U));
opj_mqc_setcurctx(mqc, ctxt1);
if (opj_mqc_decode(mqc)) {
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
*flagsp, flagsp[-1], flagsp[1],
ci);
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu);
*datap = v ? -oneplushalf : oneplushalf;
opj_t1_update_flags(flagsp, ci, v, flags_stride);
}
} }
static void opj_t1_dec_clnpass_step_vsc( static void opj_t1_dec_clnpass_step_vsc(
@ -1347,7 +1352,8 @@ static void opj_t1_dec_clnpass_step_vsc(
if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
opj_mqc_setcurctx(mqc, ctxt1); opj_mqc_setcurctx(mqc, ctxt1);
if (opj_mqc_decode(mqc)) { opj_mqc_decode(v, mqc);
if (v) {
OPJ_UINT32 lu; OPJ_UINT32 lu;
LABEL_PARTIAL: LABEL_PARTIAL:
lu = opj_t1_getctxtno_sc_or_spb_index( lu = opj_t1_getctxtno_sc_or_spb_index(
@ -1355,7 +1361,8 @@ LABEL_PARTIAL:
flagsp[1] & vsc_mask, flagsp[1] & vsc_mask,
ci); ci);
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(lu));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb(lu); opj_mqc_decode(v, mqc);
v = v ^ opj_t1_getspb(lu);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_update_flags(flagsp, ci, v, t1->w + 2U); opj_t1_update_flags(flagsp, ci, v, t1->w + 2U);
} }
@ -1454,12 +1461,11 @@ static void opj_t1_enc_clnpass(
#define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ #define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \
{ \ { \
OPJ_INT32 one, half, oneplushalf, agg, vsc; \ OPJ_INT32 one, half, oneplushalf, agg, vsc; \
OPJ_UINT32 runlen; \ OPJ_UINT32 runlen, v; \
OPJ_UINT32 i, j, k; \ OPJ_UINT32 i, j, k; \
OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \
\ const OPJ_UINT32 l_w = w; \
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ \ opj_mqc_t* mqc = &(t1->mqc); \
\
one = 1 << bpno; \ one = 1 << bpno; \
half = one >> 1; \ half = one >> 1; \
oneplushalf = one | half; \ oneplushalf = one | half; \
@ -1476,12 +1482,14 @@ static void opj_t1_enc_clnpass(
} \ } \
if (agg) { \ if (agg) { \
opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \
if (!opj_mqc_decode(mqc)) { \ opj_mqc_decode(v, mqc); \
if (!v) { \
continue; \ continue; \
} \ } \
opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \
runlen = opj_mqc_decode(mqc); \ opj_mqc_decode(runlen, mqc); \
runlen = (runlen << 1) | opj_mqc_decode(mqc); \ opj_mqc_decode(v, mqc); \
runlen = (runlen << 1) | v; \
} else { \ } else { \
runlen = 0; \ runlen = 0; \
} \ } \
@ -1500,70 +1508,95 @@ static void opj_t1_enc_clnpass(
flagsp1 += flags_stride; \ flagsp1 += flags_stride; \
} \ } \
} else { \ } else { \
OPJ_INT32 *data1 = t1->data; \ DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
opj_flag_t *flagsp1 = &t1->flags[flags_stride + 1]; \ register OPJ_UINT32 v; \
for (k = 0; k < (h & ~3u); k += 4) { \ register OPJ_INT32 *data = t1->data; \
for (i = 0; i < w; ++i) { \ register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
OPJ_INT32 *data2 = data1 + i; \ for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
opj_flag_t *flagsp2 = flagsp1 + i; \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
if (*flagsp2 == 0) { \ opj_flag_t flags = *flagsp; \
opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ if (flags == 0) { \
if (!opj_mqc_decode(mqc)) { \ OPJ_UINT32 partial = OPJ_TRUE; \
opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
if (!v) { \
continue; \ continue; \
} \ } \
opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
runlen = opj_mqc_decode(mqc); \ opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
runlen = (runlen << 1) | opj_mqc_decode(mqc); \ opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
data2 += runlen * w; \ runlen = (runlen << 1) | v; \
for (j = runlen; j < 4; ++j) { \ switch(runlen) { \
if (j == runlen) { \ case 0: \
opj_t1_dec_clnpass_step_partial(t1, flagsp2, data2, oneplushalf, j); \ opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
} else { \ flags, flagsp, flags_stride, data, \
opj_t1_dec_clnpass_step(t1, flagsp2, data2, oneplushalf, j); \ l_w, 0, mqc, curctx, \
} \ v, a, c, ct, oneplushalf); \
data2 += w; \ partial = OPJ_FALSE; \
/* falltrough */ \
case 1: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
flags, flagsp, flags_stride, data, \
l_w, 1, mqc, curctx, \
v, a, c, ct, oneplushalf); \
partial = OPJ_FALSE; \
/* falltrough */ \
case 2: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
flags, flagsp, flags_stride, data, \
l_w, 2, mqc, curctx, \
v, a, c, ct, oneplushalf); \
partial = OPJ_FALSE; \
/* falltrough */ \
case 3: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
flags, flagsp, flags_stride, data, \
l_w, 3, mqc, curctx, \
v, a, c, ct, oneplushalf); \
break; \
} \ } \
} else { \ } else { \
if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (0 * 3U)))) {\ opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 0U, flags_stride); \ flags, flagsp, flags_stride, data, \
} \ l_w, 0, mqc, curctx, \
data2 += w; \ v, a, c, ct, oneplushalf); \
if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (1 * 3U)))) {\ opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 1U, flags_stride); \ flags, flagsp, flags_stride, data, \
} \ l_w, 1, mqc, curctx, \
data2 += w; \ v, a, c, ct, oneplushalf); \
if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (2 * 3U)))) {\ opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 2U, flags_stride); \ flags, flagsp, flags_stride, data, \
} \ l_w, 2, mqc, curctx, \
data2 += w; \ v, a, c, ct, oneplushalf); \
if (!(*flagsp2 & ((T1_SIGMA_THIS | T1_PI_THIS) << (3 * 3U)))) {\ opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flagsp2, data2, oneplushalf, 3U, flags_stride); \ flags, flagsp, flags_stride, data, \
} \ l_w, 3, mqc, curctx, \
data2 += w; \ v, a, c, ct, oneplushalf); \
} \ } \
*flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \ } \
data1 += w << 2; \
flagsp1 += flags_stride; \
} \ } \
for (i = 0; i < w; ++i) { \ UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
OPJ_INT32 *data2 = data1 + i; \ if( k < h ) { \
opj_flag_t *flagsp2 = flagsp1 + i; \ for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
for (j = k; j < h; ++j) { \ for (j = 0; j < h - k; ++j) { \
opj_t1_dec_clnpass_step(t1, flagsp2, data2, oneplushalf, j - k); \ opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j); \
data2 += w; \ } \
*flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \ } \
*flagsp2 &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \ } \
} \ } \
\ \
if (segsym) { \ if (segsym) { \
OPJ_UINT32 v = 0; \ OPJ_UINT32 v, v2; \
opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \
v = opj_mqc_decode(mqc); \ opj_mqc_decode(v, mqc); \
v = (v << 1) | opj_mqc_decode(mqc); \ opj_mqc_decode(v2, mqc); \
v = (v << 1) | opj_mqc_decode(mqc); \ v = (v << 1) | v2; \
v = (v << 1) | opj_mqc_decode(mqc); \ opj_mqc_decode(v2, mqc); \
v = (v << 1) | v2; \
opj_mqc_decode(v2, mqc); \
v = (v << 1) | v2; \
/* \ /* \
if (v!=0xa) { \ if (v!=0xa) { \
opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); \ opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); \