Tier 1 decoding: add a colflags array

Addition flag array such that colflags[1+0] is for state of col=0,row=0..3,
colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ...
This array avoids too much cache trashing when processing by 4 vertical samples
as done in the various decoding steps.
This commit is contained in:
Even Rouault 2016-05-23 01:33:06 +02:00
parent 31882ad7f4
commit 1da397e94a
2 changed files with 327 additions and 109 deletions

View File

@ -39,6 +39,8 @@
#include "opj_includes.h" #include "opj_includes.h"
#include "t1_luts.h" #include "t1_luts.h"
/* #define CONSISTENCY_CHECK */
/** @defgroup T1 T1 - Implementation of the tier-1 coding */ /** @defgroup T1 T1 - Implementation of the tier-1 coding */
/*@{*/ /*@{*/
@ -51,7 +53,7 @@ static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f);
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride); static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride);
/** /**
Encode significant pass Encode significant pass
*/ */
@ -80,20 +82,26 @@ static void opj_t1_dec_sigpass_step(opj_t1_t *t1,
static INLINE void opj_t1_dec_sigpass_step_raw( static INLINE void opj_t1_dec_sigpass_step_raw(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t* colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf, OPJ_INT32 oneplushalf,
OPJ_INT32 vsc); OPJ_INT32 vsc,
OPJ_INT32 row);
static INLINE void opj_t1_dec_sigpass_step_mqc( static INLINE void opj_t1_dec_sigpass_step_mqc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t* colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf); OPJ_INT32 oneplushalf,
OPJ_INT32 row);
static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( static INLINE void opj_t1_dec_sigpass_step_mqc_vsc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t* colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf, OPJ_INT32 oneplushalf,
OPJ_INT32 vsc); OPJ_INT32 vsc,
OPJ_INT32 row);
/** /**
@ -168,23 +176,28 @@ static void opj_t1_dec_refpass_step(opj_t1_t *t1,
static INLINE void opj_t1_dec_refpass_step_raw( static INLINE void opj_t1_dec_refpass_step_raw(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 poshalf, OPJ_INT32 poshalf,
OPJ_INT32 neghalf, OPJ_INT32 neghalf,
OPJ_INT32 vsc); OPJ_INT32 row);
static INLINE void opj_t1_dec_refpass_step_mqc( static INLINE void opj_t1_dec_refpass_step_mqc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
OPJ_INT32 *datap, opj_colflag_t *colflagsp,
OPJ_INT32 poshalf,
OPJ_INT32 neghalf);
static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 poshalf, OPJ_INT32 poshalf,
OPJ_INT32 neghalf, OPJ_INT32 neghalf,
OPJ_INT32 vsc); OPJ_INT32 row);
static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
opj_t1_t *t1,
opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap,
OPJ_INT32 poshalf,
OPJ_INT32 neghalf,
OPJ_INT32 vsc,
OPJ_INT32 row);
@ -206,20 +219,26 @@ Decode clean-up pass
static void opj_t1_dec_clnpass_step_partial( static void opj_t1_dec_clnpass_step_partial(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf); OPJ_INT32 oneplushalf,
OPJ_INT32 row);
static void opj_t1_dec_clnpass_step( static void opj_t1_dec_clnpass_step(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf); OPJ_INT32 oneplushalf,
OPJ_INT32 row);
static void opj_t1_dec_clnpass_step_vsc( static void opj_t1_dec_clnpass_step_vsc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf, OPJ_INT32 oneplushalf,
OPJ_INT32 partial, OPJ_INT32 partial,
OPJ_INT32 vsc); OPJ_INT32 vsc,
OPJ_INT32 row);
/** /**
Encode clean-up pass Encode clean-up pass
*/ */
@ -319,7 +338,7 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) {
return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
} }
static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride) { static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride) {
opj_flag_t *np = flagsp - stride; opj_flag_t *np = flagsp - stride;
opj_flag_t *sp = flagsp + stride; opj_flag_t *sp = flagsp + stride;
@ -342,6 +361,47 @@ static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stri
sp[1] |= T1_SIG_NW; sp[1] |= T1_SIG_NW;
} }
static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_UINT32 s, OPJ_UINT32 stride, OPJ_INT32 row)
{
opj_t1_updateflags(flagsp, s, stride);
if( row == 0 )
{
*colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1)));
*(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1)));
*(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1)));
*(colflagsp - stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3);
*(colflagsp - stride) |= (T1_COLFLAG_SIG_OTHER_ROW_3);
*(colflagsp - stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3);
}
else if( row == 3 )
{
*colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1)));
*(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1)));
*(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS* (row-1)));
*(colflagsp + stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0);
*(colflagsp + stride) |= (T1_COLFLAG_SIG_OTHER_ROW_0);
*(colflagsp + stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0);
}
else
{
*(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1)));
*colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1)));
*(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) |
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1)));
}
}
static void opj_t1_enc_sigpass_step( opj_t1_t *t1, static void opj_t1_enc_sigpass_step( opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
@ -385,68 +445,88 @@ static void opj_t1_enc_sigpass_step( opj_t1_t *t1,
static INLINE void opj_t1_dec_sigpass_step_raw( static INLINE void opj_t1_dec_sigpass_step_raw(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t* colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf, OPJ_INT32 oneplushalf,
OPJ_INT32 vsc) OPJ_INT32 vsc,
OPJ_INT32 row)
{ {
OPJ_INT32 v, flag; OPJ_INT32 v, flag;
opj_raw_t *raw = t1->raw; /* RAW component */ opj_raw_t *raw = t1->raw; /* RAW component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { if ((flag & T1_SIG_OTH) && !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) {
if (opj_raw_decode(raw)) { if (opj_raw_decode(raw)) {
v = (OPJ_INT32)opj_raw_decode(raw); /* ESSAI */ v = (OPJ_INT32)opj_raw_decode(raw); /* ESSAI */
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row);
} }
#ifdef CONSISTENCY_CHECK
*flagsp |= T1_VISIT; *flagsp |= T1_VISIT;
#endif
*colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row));
} }
} }
static INLINE void opj_t1_dec_sigpass_step_mqc( static INLINE void opj_t1_dec_sigpass_step_mqc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t* colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf) OPJ_INT32 oneplushalf,
OPJ_INT32 row)
{ {
OPJ_INT32 v, flag; OPJ_INT32 v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */
#ifdef CONSISTENCY_CHECK
flag = *flagsp; assert( ((*flagsp & T1_SIG_OTH) && !(*flagsp & (T1_SIG | T1_VISIT))) ==
if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) ==
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))) );
#endif
if( (*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) ==
(T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) ) {
flag = *flagsp;
opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
if (opj_mqc_decode(mqc)) { if (opj_mqc_decode(mqc)) {
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row);
} }
#ifdef CONSISTENCY_CHECK
*flagsp |= T1_VISIT; *flagsp |= T1_VISIT;
#endif
*colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row));
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( static INLINE void opj_t1_dec_sigpass_step_mqc_vsc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t* colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf, OPJ_INT32 oneplushalf,
OPJ_INT32 vsc) OPJ_INT32 vsc,
OPJ_INT32 row)
{ {
OPJ_INT32 v, flag; OPJ_INT32 v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { if ((flag & T1_SIG_OTH) && !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) {
opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
if (opj_mqc_decode(mqc)) { if (opj_mqc_decode(mqc)) {
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row);
} }
#ifdef CONSISTENCY_CHECK
*flagsp |= T1_VISIT; *flagsp |= T1_VISIT;
#endif
*colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row));
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
@ -489,21 +569,26 @@ static void opj_t1_dec_sigpass_raw(
{ {
OPJ_INT32 one, half, oneplushalf, vsc; OPJ_INT32 one, half, oneplushalf, vsc;
OPJ_UINT32 i, j, k; OPJ_UINT32 i, j, k;
opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
one = 1 << bpno; one = 1 << bpno;
half = one >> 1; half = one >> 1;
oneplushalf = one | half; oneplushalf = one | half;
for (k = 0; k < t1->h; k += 4) { for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i) {
opj_colflag_t *colflags2 = colflags1 + i;
for (j = k; j < k + 4 && j < t1->h; ++j) { for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0;
opj_t1_dec_sigpass_step_raw( opj_t1_dec_sigpass_step_raw(
t1, t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->flags[((j+1) * t1->flags_stride) + i + 1],
colflags2,
&t1->data[(j * t1->w) + i], &t1->data[(j * t1->w) + i],
oneplushalf, oneplushalf,
vsc); vsc,
j - k);
} }
} }
colflags1 += t1->flags_stride;
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
@ -513,6 +598,7 @@ static void opj_t1_dec_sigpass_raw(
OPJ_UINT32 i, j, k; \ OPJ_UINT32 i, j, k; \
OPJ_INT32 *data1 = t1->data; \ OPJ_INT32 *data1 = t1->data; \
opj_flag_t *flags1 = &t1->flags[1]; \ opj_flag_t *flags1 = &t1->flags[1]; \
opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
one = 1 << bpno; \ one = 1 << bpno; \
half = one >> 1; \ half = one >> 1; \
oneplushalf = one | half; \ oneplushalf = one | half; \
@ -520,28 +606,32 @@ static void opj_t1_dec_sigpass_raw(
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
OPJ_INT32 *data2 = data1 + i; \ OPJ_INT32 *data2 = data1 + i; \
opj_flag_t *flags2 = flags1 + i; \ opj_flag_t *flags2 = flags1 + i; \
opj_colflag_t *colflags2 = colflags1 + i; \
if( *colflags2 == 0 ) continue; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0); \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1); \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2); \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3); \
data2 += w; \ data2 += w; \
} \ } \
data1 += w << 2; \ data1 += w << 2; \
flags1 += flags_stride << 2; \ flags1 += flags_stride << 2; \
colflags1 += flags_stride; \
} \ } \
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
OPJ_INT32 *data2 = data1 + i; \ OPJ_INT32 *data2 = data1 + i; \
opj_flag_t *flags2 = flags1 + i; \ opj_flag_t *flags2 = flags1 + i; \
opj_colflag_t *colflags2 = colflags1 + i; \
for (j = k; j < h; ++j) { \ for (j = k; j < h; ++j) { \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, j - k); \
data2 += w; \ data2 += w; \
} \ } \
} \ } \
@ -568,21 +658,26 @@ static void opj_t1_dec_sigpass_mqc_vsc(
{ {
OPJ_INT32 one, half, oneplushalf, vsc; OPJ_INT32 one, half, oneplushalf, vsc;
OPJ_UINT32 i, j, k; OPJ_UINT32 i, j, k;
opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
one = 1 << bpno; one = 1 << bpno;
half = one >> 1; half = one >> 1;
oneplushalf = one | half; oneplushalf = one | half;
for (k = 0; k < t1->h; k += 4) { for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i) {
opj_colflag_t *colflags2 = colflags1 + i;
for (j = k; j < k + 4 && j < t1->h; ++j) { for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0;
opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_dec_sigpass_step_mqc_vsc(
t1, t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->flags[((j+1) * t1->flags_stride) + i + 1],
colflags2,
&t1->data[(j * t1->w) + i], &t1->data[(j * t1->w) + i],
oneplushalf, oneplushalf,
vsc); vsc,
j - k);
} }
} }
colflags1 += t1->flags_stride;
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
@ -619,64 +714,81 @@ static void opj_t1_enc_refpass_step( opj_t1_t *t1,
static INLINE void opj_t1_dec_refpass_step_raw( static INLINE void opj_t1_dec_refpass_step_raw(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 poshalf, OPJ_INT32 poshalf,
OPJ_INT32 neghalf, OPJ_INT32 neghalf,
OPJ_INT32 vsc) OPJ_INT32 row)
{ {
OPJ_INT32 v, t, flag; OPJ_INT32 v, t;
opj_raw_t *raw = t1->raw; /* RAW component */ opj_raw_t *raw = t1->raw; /* RAW component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) ==
if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) {
v = (OPJ_INT32)opj_raw_decode(raw); v = (OPJ_INT32)opj_raw_decode(raw);
t = v ? poshalf : neghalf; t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t; *datap += *datap < 0 ? -t : t;
*flagsp |= T1_REFINE; *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row));
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
static INLINE void opj_t1_dec_refpass_step_mqc( static INLINE void opj_t1_dec_refpass_step_mqc(
opj_t1_t *t1, opj_t1_t *t1,
#ifdef CONSISTENCY_CHECK
opj_flag_t *flagsp, opj_flag_t *flagsp,
#else
opj_flag_t *flagsp_unused,
#endif
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 poshalf, OPJ_INT32 poshalf,
OPJ_INT32 neghalf) OPJ_INT32 neghalf,
OPJ_INT32 row)
{ {
OPJ_INT32 v, t, flag; OPJ_INT32 v, t;
opj_mqc_t *mqc = t1->mqc; /* MQC component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */
#ifdef CONSISTENCY_CHECK
flag = *flagsp; assert( ((*flagsp & (T1_SIG | T1_VISIT)) == T1_SIG) ==
if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) );
opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag((OPJ_UINT32)flag)); /* ESSAI */ #endif
if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) ==
((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) {
OPJ_UINT32 tmp1 = (*colflagsp & (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1;
opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */
v = opj_mqc_decode(mqc); v = opj_mqc_decode(mqc);
t = v ? poshalf : neghalf; t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t; *datap += *datap < 0 ? -t : t;
*flagsp |= T1_REFINE; *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row));
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
static INLINE void opj_t1_dec_refpass_step_mqc_vsc( static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 poshalf, OPJ_INT32 poshalf,
OPJ_INT32 neghalf, OPJ_INT32 neghalf,
OPJ_INT32 vsc) OPJ_INT32 vsc,
OPJ_INT32 row)
{ {
OPJ_INT32 v, t, flag; OPJ_INT32 v, t, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) ==
if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) {
opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag((OPJ_UINT32)flag)); /* ESSAI */ OPJ_INT32 flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
OPJ_UINT32 tmp1 = (flag & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1;
opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */
v = opj_mqc_decode(mqc); v = opj_mqc_decode(mqc);
t = v ? poshalf : neghalf; t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t; *datap += *datap < 0 ? -t : t;
*flagsp |= T1_REFINE; *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row));
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
@ -719,22 +831,24 @@ static void opj_t1_dec_refpass_raw(
OPJ_INT32 one, poshalf, neghalf; OPJ_INT32 one, poshalf, neghalf;
OPJ_UINT32 i, j, k; OPJ_UINT32 i, j, k;
OPJ_INT32 vsc; OPJ_INT32 vsc;
opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
one = 1 << bpno; one = 1 << bpno;
poshalf = one >> 1; poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1; neghalf = bpno > 0 ? -poshalf : -1;
for (k = 0; k < t1->h; k += 4) { for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i) {
opj_colflag_t *colflags2 = colflags1 + i;
for (j = k; j < k + 4 && j < t1->h; ++j) { for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0;
opj_t1_dec_refpass_step_raw( opj_t1_dec_refpass_step_raw(
t1, t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->flags[((j+1) * t1->flags_stride) + i + 1],
colflags2,
&t1->data[(j * t1->w) + i], &t1->data[(j * t1->w) + i],
poshalf, poshalf,
neghalf, neghalf, j - k);
vsc);
} }
} }
colflags1 += t1->flags_stride;
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
@ -744,6 +858,7 @@ static void opj_t1_dec_refpass_raw(
OPJ_UINT32 i, j, k; \ OPJ_UINT32 i, j, k; \
OPJ_INT32 *data1 = t1->data; \ OPJ_INT32 *data1 = t1->data; \
opj_flag_t *flags1 = &t1->flags[1]; \ opj_flag_t *flags1 = &t1->flags[1]; \
opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
one = 1 << bpno; \ one = 1 << bpno; \
poshalf = one >> 1; \ poshalf = one >> 1; \
neghalf = bpno > 0 ? -poshalf : -1; \ neghalf = bpno > 0 ? -poshalf : -1; \
@ -751,28 +866,32 @@ static void opj_t1_dec_refpass_raw(
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
OPJ_INT32 *data2 = data1 + i; \ OPJ_INT32 *data2 = data1 + i; \
opj_flag_t *flags2 = flags1 + i; \ opj_flag_t *flags2 = flags1 + i; \
opj_colflag_t *colflags2 = colflags1 + i; \
if( *colflags2 == 0 ) continue; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 0); \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 1); \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 2); \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 3); \
data2 += w; \ data2 += w; \
} \ } \
data1 += w << 2; \ data1 += w << 2; \
flags1 += flags_stride << 2; \ flags1 += flags_stride << 2; \
colflags1 += flags_stride; \
} \ } \
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
OPJ_INT32 *data2 = data1 + i; \ OPJ_INT32 *data2 = data1 + i; \
opj_flag_t *flags2 = flags1 + i; \ opj_flag_t *flags2 = flags1 + i; \
opj_colflag_t *colflags2 = colflags1 + i; \
for (j = k; j < h; ++j) { \ for (j = k; j < h; ++j) { \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, j - k); \
data2 += w; \ data2 += w; \
} \ } \
} \ } \
@ -800,22 +919,26 @@ static void opj_t1_dec_refpass_mqc_vsc(
OPJ_INT32 one, poshalf, neghalf; OPJ_INT32 one, poshalf, neghalf;
OPJ_UINT32 i, j, k; OPJ_UINT32 i, j, k;
OPJ_INT32 vsc; OPJ_INT32 vsc;
opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
one = 1 << bpno; one = 1 << bpno;
poshalf = one >> 1; poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1; neghalf = bpno > 0 ? -poshalf : -1;
for (k = 0; k < t1->h; k += 4) { for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i) {
opj_colflag_t *colflags2 = colflags1 + i;
for (j = k; j < k + 4 && j < t1->h; ++j) { for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((j == k + 3 || j == t1->h - 1)) ? 1 : 0; vsc = ((j == k + 3 || j == t1->h - 1)) ? 1 : 0;
opj_t1_dec_refpass_step_mqc_vsc( opj_t1_dec_refpass_step_mqc_vsc(
t1, t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->flags[((j+1) * t1->flags_stride) + i + 1],
colflags2,
&t1->data[(j * t1->w) + i], &t1->data[(j * t1->w) + i],
poshalf, poshalf,
neghalf, neghalf,
vsc); vsc, j - k);
} }
} }
colflags1 += t1->flags_stride;
} }
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
@ -858,8 +981,10 @@ LABEL_PARTIAL:
static void opj_t1_dec_clnpass_step_partial( static void opj_t1_dec_clnpass_step_partial(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf) OPJ_INT32 oneplushalf,
OPJ_INT32 row)
{ {
OPJ_INT32 v, flag; OPJ_INT32 v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */
@ -868,38 +993,48 @@ static void opj_t1_dec_clnpass_step_partial(
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row);
#ifdef CONSISTENCY_CHECK
*flagsp &= ~T1_VISIT; *flagsp &= ~T1_VISIT;
#endif
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
static void opj_t1_dec_clnpass_step( static void opj_t1_dec_clnpass_step(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf) OPJ_INT32 oneplushalf,
OPJ_INT32 row)
{ {
OPJ_INT32 v, flag; OPJ_INT32 v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */
#ifdef CONSISTENCY_CHECK
flag = *flagsp; assert( (!(*flagsp & (T1_SIG | T1_VISIT))) == (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (4*row)))) );
if (!(flag & (T1_SIG | T1_VISIT))) { #endif
if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (4*row)))) {
flag = *flagsp;
opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
if (opj_mqc_decode(mqc)) { if (opj_mqc_decode(mqc)) {
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row);
} }
} }
#ifdef CONSISTENCY_CHECK
*flagsp &= ~T1_VISIT; *flagsp &= ~T1_VISIT;
#endif
} /* VSC and BYPASS by Antonin */ } /* VSC and BYPASS by Antonin */
static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf) OPJ_INT32 oneplushalf,
OPJ_INT32 row)
{ {
OPJ_INT32 v; OPJ_INT32 v;
OPJ_INT32 flag; OPJ_INT32 flag;
@ -914,7 +1049,7 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row);
} }
} }
/*flagsp &= ~T1_VISIT;*/ /*flagsp &= ~T1_VISIT;*/
@ -923,10 +1058,12 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(
static void opj_t1_dec_clnpass_step_vsc( static void opj_t1_dec_clnpass_step_vsc(
opj_t1_t *t1, opj_t1_t *t1,
opj_flag_t *flagsp, opj_flag_t *flagsp,
opj_colflag_t *colflagsp,
OPJ_INT32 *datap, OPJ_INT32 *datap,
OPJ_INT32 oneplushalf, OPJ_INT32 oneplushalf,
OPJ_INT32 partial, OPJ_INT32 partial,
OPJ_INT32 vsc) OPJ_INT32 vsc,
OPJ_INT32 row)
{ {
OPJ_INT32 v, flag; OPJ_INT32 v, flag;
@ -936,17 +1073,19 @@ static void opj_t1_dec_clnpass_step_vsc(
if (partial) { if (partial) {
goto LABEL_PARTIAL; goto LABEL_PARTIAL;
} }
if (!(flag & (T1_SIG | T1_VISIT))) { if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) {
opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
if (opj_mqc_decode(mqc)) { if (opj_mqc_decode(mqc)) {
LABEL_PARTIAL: LABEL_PARTIAL:
opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
*datap = v ? -oneplushalf : oneplushalf; *datap = v ? -oneplushalf : oneplushalf;
opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row);
} }
} }
#ifdef CONSISTENCY_CHECK
*flagsp &= ~T1_VISIT; *flagsp &= ~T1_VISIT;
#endif
} }
static void opj_t1_enc_clnpass( static void opj_t1_enc_clnpass(
@ -1015,7 +1154,7 @@ static void opj_t1_enc_clnpass(
#define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] #define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)]
#define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ #define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \
{ \ { \
OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \
OPJ_UINT32 i, j, k; \ OPJ_UINT32 i, j, k; \
@ -1027,14 +1166,17 @@ static void opj_t1_enc_clnpass(
half = one >> 1; \ half = one >> 1; \
oneplushalf = one | half; \ oneplushalf = one | half; \
if (cblksty & J2K_CCP_CBLKSTY_VSC) { \ if (cblksty & J2K_CCP_CBLKSTY_VSC) { \
opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
for (k = 0; k < h; k += 4) { \ for (k = 0; k < h; k += 4) { \
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
opj_colflag_t *colflags2 = colflags1 + i; \
if (k + 3 < h) { \ if (k + 3 < h) { \
agg = !(MACRO_t1_flags_internal(1 + k,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ agg = !((*colflags2 & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \
|| MACRO_t1_flags_internal(1 + k + 1,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \
|| MACRO_t1_flags_internal(1 + k + 2,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \
|| (MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \ T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3)) || \
& (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); \ ((MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \
& ((~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG_OTH)))); \
} else { \ } else { \
agg = 0; \ agg = 0; \
} \ } \
@ -1054,24 +1196,36 @@ static void opj_t1_enc_clnpass(
opj_t1_dec_clnpass_step_vsc( \ opj_t1_dec_clnpass_step_vsc( \
t1, \ t1, \
&t1->flags[((j+1) * flags_stride) + i + 1], \ &t1->flags[((j+1) * flags_stride) + i + 1], \
colflags2, \
&t1->data[(j * w) + i], \ &t1->data[(j * w) + i], \
oneplushalf, \ oneplushalf, \
agg && (j == k + (OPJ_UINT32)runlen), \ agg && (j == k + (OPJ_UINT32)runlen), \
vsc); \ vsc, j - k); \
} \ } \
*colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \
} \ } \
colflags1 += flags_stride; \
} \ } \
} else { \ } else { \
OPJ_INT32 *data1 = t1->data; \ OPJ_INT32 *data1 = t1->data; \
opj_flag_t *flags1 = &t1->flags[1]; \ opj_flag_t *flags1 = &t1->flags[1]; \
opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
for (k = 0; k < (h & ~3u); k += 4) { \ for (k = 0; k < (h & ~3u); k += 4) { \
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
OPJ_INT32 *data2 = data1 + i; \ OPJ_INT32 *data2 = data1 + i; \
opj_flag_t *flags2 = flags1 + i; \ opj_flag_t *flags2 = flags1 + i; \
agg = !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \ opj_colflag_t *colflags2 = colflags1 + i; \
MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \ opj_colflag_t colflags = *colflags2; \
MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \ agg = !(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \
MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); \ T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \
T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \
T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3 | T1_COLFLAG_SIG_OTHER_ROW_3)); \
if( consistency_check ) { \
assert( agg == !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \
MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \
MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \
MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)) ); \
} \
if (agg) { \ if (agg) { \
opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \
if (!opj_mqc_decode(mqc)) { \ if (!opj_mqc_decode(mqc)) { \
@ -1084,52 +1238,59 @@ static void opj_t1_enc_clnpass(
data2 += (OPJ_UINT32)runlen * w; \ data2 += (OPJ_UINT32)runlen * w; \
for (j = (OPJ_UINT32)runlen; j < 4 && j < h; ++j) { \ for (j = (OPJ_UINT32)runlen; j < 4 && j < h; ++j) { \
flags2 += flags_stride; \ flags2 += flags_stride; \
if (agg && (j == (OPJ_UINT32)runlen)) { \ if (j == (OPJ_UINT32)runlen) { \
opj_t1_dec_clnpass_step_partial(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step_partial(t1, flags2, colflags2, data2, oneplushalf, j); \
} else { \ } else { \
opj_t1_dec_clnpass_step(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j); \
} \ } \
data2 += w; \ data2 += w; \
} \ } \
} else { \ } else { \
opj_flag_t flag; \
flags2 += flags_stride; \ flags2 += flags_stride; \
flag = *flags2; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
if (!(flag & (T1_SIG | T1_VISIT))) \ if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0); \
*flags2 &= ~T1_VISIT; \ } \
if( consistency_check ) *flags2 &= ~T1_VISIT; \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
flag = *flags2; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
if (!(flag & (T1_SIG | T1_VISIT))) \ if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1); \
*flags2 &= ~T1_VISIT; \ } \
if( consistency_check ) *flags2 &= ~T1_VISIT; \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
flag = *flags2; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
if (!(flag & (T1_SIG | T1_VISIT))) \ if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2); \
*flags2 &= ~T1_VISIT; \ } \
if( consistency_check ) *flags2 &= ~T1_VISIT; \
data2 += w; \ data2 += w; \
flags2 += flags_stride; \ flags2 += flags_stride; \
flag = *flags2; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
if (!(flag & (T1_SIG | T1_VISIT))) \ if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\
opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3); \
*flags2 &= ~T1_VISIT; \ } \
if( consistency_check ) *flags2 &= ~T1_VISIT; \
data2 += w; \ data2 += w; \
} \ } \
*colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \
} \ } \
data1 += w << 2; \ data1 += w << 2; \
flags1 += flags_stride << 2; \ flags1 += flags_stride << 2; \
colflags1 += flags_stride; \
} \ } \
for (i = 0; i < w; ++i) { \ for (i = 0; i < w; ++i) { \
OPJ_INT32 *data2 = data1 + i; \ OPJ_INT32 *data2 = data1 + i; \
opj_flag_t *flags2 = flags1 + i; \ opj_flag_t *flags2 = flags1 + i; \
opj_colflag_t *colflags2 = colflags1 + i; \
for (j = k; j < h; ++j) { \ for (j = k; j < h; ++j) { \
flags2 += flags_stride; \ flags2 += flags_stride; \
opj_t1_dec_clnpass_step(t1, flags2, data2, oneplushalf); \ opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j - k); \
data2 += w; \ data2 += w; \
} \ } \
*colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \
} \ } \
} \ } \
\ \
@ -1153,7 +1314,11 @@ static void opj_t1_dec_clnpass_64x64(
OPJ_INT32 bpno, OPJ_INT32 bpno,
OPJ_INT32 cblksty) OPJ_INT32 cblksty)
{ {
opj_t1_dec_clnpass_internal(t1, bpno, cblksty, 64, 64, 66); #ifdef CONSISTENCY_CHECK
opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, 64, 64, 66);
#else
opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, 64, 64, 66);
#endif
} }
static void opj_t1_dec_clnpass_generic( static void opj_t1_dec_clnpass_generic(
@ -1161,7 +1326,11 @@ static void opj_t1_dec_clnpass_generic(
OPJ_INT32 bpno, OPJ_INT32 bpno,
OPJ_INT32 cblksty) OPJ_INT32 cblksty)
{ {
opj_t1_dec_clnpass_internal(t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride); #ifdef CONSISTENCY_CHECK
opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride);
#else
opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride);
#endif
} }
@ -1234,6 +1403,21 @@ static OPJ_BOOL opj_t1_allocate_buffers(
t1->flagssize=flagssize; t1->flagssize=flagssize;
} }
memset(t1->flags,0,flagssize * sizeof(opj_flag_t)); memset(t1->flags,0,flagssize * sizeof(opj_flag_t));
if (!t1->encoder) {
OPJ_UINT32 colflags_size=t1->flags_stride * ((h+3) / 4 + 2);
if(colflags_size > t1->colflags_size){
opj_aligned_free(t1->colflags);
t1->colflags = (opj_colflag_t*) opj_aligned_malloc(colflags_size * sizeof(opj_colflag_t));
if(!t1->colflags){
/* FIXME event manager error callback */
return OPJ_FALSE;
}
t1->colflags_size=colflags_size;
}
memset(t1->colflags,0,colflags_size * sizeof(opj_colflag_t));
}
t1->w=w; t1->w=w;
t1->h=h; t1->h=h;
@ -1304,6 +1488,10 @@ void opj_t1_destroy(opj_t1_t *p_t1)
p_t1->flags = 00; p_t1->flags = 00;
} }
if (p_t1->colflags) {
opj_aligned_free(p_t1->colflags);
p_t1->colflags = 00;
}
opj_free(p_t1); opj_free(p_t1);
} }

View File

@ -52,6 +52,7 @@ in T1.C are used by some function in TCD.C.
/* CAUTION: the value of those constants must not be changed, otherwise the */ /* CAUTION: the value of those constants must not be changed, otherwise the */
/* optimization of opj_t1_updateflags() will break! */ /* optimization of opj_t1_updateflags() will break! */
/* BEGINNING of flags that apply to opj_flag_t */
#define T1_SIG_NE 0x0001 /**< Context orientation : North-East direction */ #define T1_SIG_NE 0x0001 /**< Context orientation : North-East direction */
#define T1_SIG_SE 0x0002 /**< Context orientation : South-East direction */ #define T1_SIG_SE 0x0002 /**< Context orientation : South-East direction */
#define T1_SIG_SW 0x0004 /**< Context orientation : South-West direction */ #define T1_SIG_SW 0x0004 /**< Context orientation : South-West direction */
@ -69,9 +70,10 @@ in T1.C are used by some function in TCD.C.
#define T1_SGN_W 0x0800 #define T1_SGN_W 0x0800
#define T1_SGN (T1_SGN_N|T1_SGN_E|T1_SGN_S|T1_SGN_W) #define T1_SGN (T1_SGN_N|T1_SGN_E|T1_SGN_S|T1_SGN_W)
#define T1_SIG 0x1000 #define T1_SIG 0x1000 /**< No longer used by decoder */
#define T1_REFINE 0x2000 #define T1_REFINE 0x2000 /**< No longer used by decoder */
#define T1_VISIT 0x4000 #define T1_VISIT 0x4000 /**< No longer used by decoder */
/* END of flags that apply to opj_flag_t */
#define T1_NUMCTXS_ZC 9 #define T1_NUMCTXS_ZC 9
#define T1_NUMCTXS_SC 5 #define T1_NUMCTXS_SC 5
@ -91,10 +93,32 @@ in T1.C are used by some function in TCD.C.
#define T1_TYPE_MQ 0 /**< Normal coding using entropy coder */ #define T1_TYPE_MQ 0 /**< Normal coding using entropy coder */
#define T1_TYPE_RAW 1 /**< No encoding the information is store under raw format in codestream (mode switch RAW)*/ #define T1_TYPE_RAW 1 /**< No encoding the information is store under raw format in codestream (mode switch RAW)*/
/* Those flags are used by opj_colflag_t */
#define T1_COLFLAG_RBS 4 /* RBS = Row Bit Shift */
#define T1_COLFLAG_SIG_OTHER_ROW_0 (1 << 0) /**< This sample has at least one significant neighbour */
#define T1_COLFLAG_SIG_ROW_0 (1 << 1) /**< This sample is significant */
#define T1_COLFLAG_VISIT_ROW_0 (1 << 2) /**< This sample has been visited */
#define T1_COLFLAG_REFINE_ROW_0 (1 << 3) /**< This sample has been refined */
#define T1_COLFLAG_SIG_OTHER_ROW_1 (T1_COLFLAG_SIG_OTHER_ROW_0 << T1_COLFLAG_RBS)
#define T1_COLFLAG_SIG_ROW_1 (T1_COLFLAG_SIG_ROW_0 << T1_COLFLAG_RBS)
#define T1_COLFLAG_VISIT_ROW_1 (T1_COLFLAG_VISIT_ROW_0 << T1_COLFLAG_RBS)
#define T1_COLFLAG_REFINE_ROW_1 (T1_COLFLAG_REFINE_ROW_0 << T1_COLFLAG_RBS)
#define T1_COLFLAG_SIG_OTHER_ROW_2 (T1_COLFLAG_SIG_OTHER_ROW_0 << (2*T1_COLFLAG_RBS))
#define T1_COLFLAG_SIG_ROW_2 (T1_COLFLAG_SIG_ROW_0 << (2*T1_COLFLAG_RBS))
#define T1_COLFLAG_VISIT_ROW_2 (T1_COLFLAG_VISIT_ROW_0 << (2*T1_COLFLAG_RBS))
#define T1_COLFLAG_REFINE_ROW_2 (T1_COLFLAG_REFINE_ROW_0 << (2*T1_COLFLAG_RBS))
#define T1_COLFLAG_SIG_OTHER_ROW_3 (T1_COLFLAG_SIG_OTHER_ROW_0 << (3*T1_COLFLAG_RBS))
#define T1_COLFLAG_SIG_ROW_3 (T1_COLFLAG_SIG_ROW_0 << (3*T1_COLFLAG_RBS))
#define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3*T1_COLFLAG_RBS))
#define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3*T1_COLFLAG_RBS))
/* ----------------------------------------------------------------------- */ /* ----------------------------------------------------------------------- */
typedef OPJ_INT16 opj_flag_t; typedef OPJ_INT16 opj_flag_t;
/** Flags for 4 consecutive rows of a column */
typedef OPJ_UINT16 opj_colflag_t;
/** /**
Tier-1 coding (coding of code-block coefficients) Tier-1 coding (coding of code-block coefficients)
*/ */
@ -107,11 +131,17 @@ typedef struct opj_t1 {
OPJ_INT32 *data; OPJ_INT32 *data;
opj_flag_t *flags; opj_flag_t *flags;
/** Addition flag array such that colflags[1+0] is for state of col=0,row=0..3,
colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ...
This array avoids too much cache trashing when processing by 4 vertical samples
as done in the various decoding steps. */
opj_colflag_t* colflags;
OPJ_UINT32 w; OPJ_UINT32 w;
OPJ_UINT32 h; OPJ_UINT32 h;
OPJ_UINT32 datasize; OPJ_UINT32 datasize;
OPJ_UINT32 flagssize; OPJ_UINT32 flagssize;
OPJ_UINT32 flags_stride; OPJ_UINT32 flags_stride;
OPJ_UINT32 colflags_size;
OPJ_UINT32 data_stride; OPJ_UINT32 data_stride;
OPJ_BOOL encoder; OPJ_BOOL encoder;
} opj_t1_t; } opj_t1_t;