T1 encoder: speed-up by aggressive inlining and more cache friendly data organization

~ 9% speed improvement seen on 10980x10980 uint16 image, T36JTT_20160914T074612_B02.tif
opj_compress time from 17.2s to 15.8s
This commit is contained in:
Even Rouault 2020-05-24 15:38:21 +02:00
parent 1e931fdb36
commit 1c5627ee74
No known key found for this signature in database
GPG Key ID: 33EBBFC47B3DD87D
5 changed files with 460 additions and 392 deletions

View File

@ -46,27 +46,6 @@
/** @name Local static functions */ /** @name Local static functions */
/*@{*/ /*@{*/
/**
Output a byte, doing bit-stuffing if necessary.
After a 0xff byte, the next byte must be smaller than 0x90.
@param mqc MQC handle
*/
static void opj_mqc_byteout(opj_mqc_t *mqc);
/**
Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000
@param mqc MQC handle
*/
static void opj_mqc_renorme(opj_mqc_t *mqc);
/**
Encode the most probable symbol
@param mqc MQC handle
*/
static void opj_mqc_codemps(opj_mqc_t *mqc);
/**
Encode the most least symbol
@param mqc MQC handle
*/
static void opj_mqc_codelps(opj_mqc_t *mqc);
/** /**
Fill mqc->c with 1's for flushing Fill mqc->c with 1's for flushing
@param mqc MQC handle @param mqc MQC handle
@ -182,80 +161,6 @@ static const opj_mqc_state_t mqc_states[47 * 2] = {
========================================================== ==========================================================
*/ */
static void opj_mqc_byteout(opj_mqc_t *mqc)
{
/* bp is initialized to start - 1 in opj_mqc_init_enc() */
/* but this is safe, see opj_tcd_code_block_enc_allocate_data() */
assert(mqc->bp >= mqc->start - 1);
if (*mqc->bp == 0xff) {
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
mqc->c &= 0xfffff;
mqc->ct = 7;
} else {
if ((mqc->c & 0x8000000) == 0) {
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
mqc->c &= 0x7ffff;
mqc->ct = 8;
} else {
(*mqc->bp)++;
if (*mqc->bp == 0xff) {
mqc->c &= 0x7ffffff;
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
mqc->c &= 0xfffff;
mqc->ct = 7;
} else {
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
mqc->c &= 0x7ffff;
mqc->ct = 8;
}
}
}
}
static void opj_mqc_renorme(opj_mqc_t *mqc)
{
do {
mqc->a <<= 1;
mqc->c <<= 1;
mqc->ct--;
if (mqc->ct == 0) {
opj_mqc_byteout(mqc);
}
} while ((mqc->a & 0x8000) == 0);
}
static void opj_mqc_codemps(opj_mqc_t *mqc)
{
mqc->a -= (*mqc->curctx)->qeval;
if ((mqc->a & 0x8000) == 0) {
if (mqc->a < (*mqc->curctx)->qeval) {
mqc->a = (*mqc->curctx)->qeval;
} else {
mqc->c += (*mqc->curctx)->qeval;
}
*mqc->curctx = (*mqc->curctx)->nmps;
opj_mqc_renorme(mqc);
} else {
mqc->c += (*mqc->curctx)->qeval;
}
}
static void opj_mqc_codelps(opj_mqc_t *mqc)
{
mqc->a -= (*mqc->curctx)->qeval;
if (mqc->a < (*mqc->curctx)->qeval) {
mqc->c += (*mqc->curctx)->qeval;
} else {
mqc->a = (*mqc->curctx)->qeval;
}
*mqc->curctx = (*mqc->curctx)->nlps;
opj_mqc_renorme(mqc);
}
static void opj_mqc_setbits(opj_mqc_t *mqc) static void opj_mqc_setbits(opj_mqc_t *mqc)
{ {
OPJ_UINT32 tempc = mqc->c + mqc->a; OPJ_UINT32 tempc = mqc->c + mqc->a;
@ -303,14 +208,6 @@ void opj_mqc_init_enc(opj_mqc_t *mqc, OPJ_BYTE *bp)
mqc->end_of_byte_stream_counter = 0; mqc->end_of_byte_stream_counter = 0;
} }
void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d)
{
if ((*mqc->curctx)->mps == d) {
opj_mqc_codemps(mqc);
} else {
opj_mqc_codelps(mqc);
}
}
void opj_mqc_flush(opj_mqc_t *mqc) void opj_mqc_flush(opj_mqc_t *mqc)
{ {
@ -329,8 +226,6 @@ void opj_mqc_flush(opj_mqc_t *mqc)
} }
} }
#define BYPASS_CT_INIT 0xDEADBEEF
void opj_mqc_bypass_init_enc(opj_mqc_t *mqc) void opj_mqc_bypass_init_enc(opj_mqc_t *mqc)
{ {
/* This function is normally called after at least one opj_mqc_flush() */ /* This function is normally called after at least one opj_mqc_flush() */
@ -475,6 +370,43 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc)
} }
} }
static INLINE void opj_mqc_renorme(opj_mqc_t *mqc)
{
opj_mqc_renorme_macro(mqc, mqc->a, mqc->c, mqc->ct);
}
/**
Encode the most probable symbol
@param mqc MQC handle
*/
static INLINE void opj_mqc_codemps(opj_mqc_t *mqc)
{
opj_mqc_codemps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct);
}
/**
Encode the most least symbol
@param mqc MQC handle
*/
static INLINE void opj_mqc_codelps(opj_mqc_t *mqc)
{
opj_mqc_codelps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct);
}
/**
Encode a symbol using the MQ-coder
@param mqc MQC handle
@param d The symbol to be encoded (0 or 1)
*/
static INLINE void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d)
{
if ((*mqc->curctx)->mps == d) {
opj_mqc_codemps(mqc);
} else {
opj_mqc_codelps(mqc);
}
}
void opj_mqc_segmark_enc(opj_mqc_t *mqc) void opj_mqc_segmark_enc(opj_mqc_t *mqc)
{ {
OPJ_UINT32 i; OPJ_UINT32 i;
@ -557,4 +489,36 @@ void opj_mqc_setstate(opj_mqc_t *mqc, OPJ_UINT32 ctxno, OPJ_UINT32 msb,
mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)]; mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)];
} }
void opj_mqc_byteout(opj_mqc_t *mqc)
{
/* bp is initialized to start - 1 in opj_mqc_init_enc() */
/* but this is safe, see opj_tcd_code_block_enc_allocate_data() */
assert(mqc->bp >= mqc->start - 1);
if (*mqc->bp == 0xff) {
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
mqc->c &= 0xfffff;
mqc->ct = 7;
} else {
if ((mqc->c & 0x8000000) == 0) {
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
mqc->c &= 0x7ffff;
mqc->ct = 8;
} else {
(*mqc->bp)++;
if (*mqc->bp == 0xff) {
mqc->c &= 0x7ffffff;
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
mqc->c &= 0xfffff;
mqc->ct = 7;
} else {
mqc->bp++;
*mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
mqc->c &= 0x7ffff;
mqc->ct = 8;
}
}
}
}

View File

@ -96,6 +96,8 @@ typedef struct opj_mqc {
OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA]; OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA];
} opj_mqc_t; } opj_mqc_t;
#define BYPASS_CT_INIT 0xDEADBEEF
#include "mqc_inl.h" #include "mqc_inl.h"
/** @name Exported functions */ /** @name Exported functions */
@ -135,12 +137,7 @@ Set the current context used for coding/decoding
@param ctxno Number that identifies the context @param ctxno Number that identifies the context
*/ */
#define opj_mqc_setcurctx(mqc, ctxno) (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] #define opj_mqc_setcurctx(mqc, ctxno) (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
/**
Encode a symbol using the MQ-coder
@param mqc MQC handle
@param d The symbol to be encoded (0 or 1)
*/
void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d);
/** /**
Flush the encoder, so that all remaining data is written Flush the encoder, so that all remaining data is written
@param mqc MQC handle @param mqc MQC handle

View File

@ -156,13 +156,13 @@ static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc)
} \ } \
} }
#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ #define DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \
register const opj_mqc_state_t **curctx = mqc->curctx; \ register const opj_mqc_state_t **curctx = mqc->curctx; \
register OPJ_UINT32 c = mqc->c; \ register OPJ_UINT32 c = mqc->c; \
register OPJ_UINT32 a = mqc->a; \ register OPJ_UINT32 a = mqc->a; \
register OPJ_UINT32 ct = mqc->ct register OPJ_UINT32 ct = mqc->ct
#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ #define UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \
mqc->curctx = curctx; \ mqc->curctx = curctx; \
mqc->c = c; \ mqc->c = c; \
mqc->a = a; \ mqc->a = a; \
@ -193,4 +193,90 @@ Decode a symbol
#define opj_mqc_decode(d, mqc) \ #define opj_mqc_decode(d, mqc) \
opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct) opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct)
/**
Output a byte, doing bit-stuffing if necessary.
After a 0xff byte, the next byte must be smaller than 0x90.
@param mqc MQC handle
*/
void opj_mqc_byteout(opj_mqc_t *mqc);
/**
Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000
@param mqc MQC handle
@param a_ value of mqc->a
@param c_ value of mqc->c_
@param ct_ value of mqc->ct_
*/
#define opj_mqc_renorme_macro(mqc, a_, c_, ct_) \
{ \
do { \
a_ <<= 1; \
c_ <<= 1; \
ct_--; \
if (ct_ == 0) { \
mqc->c = c_; \
opj_mqc_byteout(mqc); \
c_ = mqc->c; \
ct_ = mqc->ct; \
} \
} while( (a_ & 0x8000) == 0); \
}
#define opj_mqc_codemps_macro(mqc, curctx, a, c, ct) \
{ \
a -= (*curctx)->qeval; \
if ((a & 0x8000) == 0) { \
if (a < (*curctx)->qeval) { \
a = (*curctx)->qeval; \
} else { \
c += (*curctx)->qeval; \
} \
*curctx = (*curctx)->nmps; \
opj_mqc_renorme_macro(mqc, a, c, ct); \
} else { \
c += (*curctx)->qeval; \
} \
}
#define opj_mqc_codelps_macro(mqc, curctx, a, c, ct) \
{ \
a -= (*curctx)->qeval; \
if (a < (*curctx)->qeval) { \
c += (*curctx)->qeval; \
} else { \
a = (*curctx)->qeval; \
} \
*curctx = (*curctx)->nlps; \
opj_mqc_renorme_macro(mqc, a, c, ct); \
}
#define opj_mqc_encode_macro(mqc, curctx, a, c, ct, d) \
{ \
if ((*curctx)->mps == (d)) { \
opj_mqc_codemps_macro(mqc, curctx, a, c, ct); \
} else { \
opj_mqc_codelps_macro(mqc, curctx, a, c, ct); \
} \
}
#define opj_mqc_bypass_enc_macro(mqc, c, ct, d) \
{\
if (ct == BYPASS_CT_INIT) {\
ct = 8;\
}\
ct--;\
c = c + ((d) << ct);\
if (ct == 0) {\
*mqc->bp = (OPJ_BYTE)c;\
ct = 8;\
/* If the previous byte was 0xff, make sure that the next msb is 0 */ \
if (*mqc->bp == 0xff) {\
ct = 7;\
}\
mqc->bp++;\
c = 0;\
}\
}
#endif /* OPJ_MQC_INL_H */ #endif /* OPJ_MQC_INL_H */

View File

@ -61,6 +61,13 @@
#define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] #define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
/* Macros to deal with signed integer with just MSB bit set for
* negative values (smr = signed magnitude representation) */
#define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
#define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
/** @name Local static functions */ /** @name Local static functions */
/*@{*/ /*@{*/
@ -329,61 +336,53 @@ static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
/** /**
Encode significant pass Encode significant pass
*/ */
static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, #define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
opj_flag_t *flagsp, { \
OPJ_INT32 *datap, OPJ_UINT32 v; \
OPJ_INT32 bpno, const OPJ_UINT32 ci = (ciIn); \
OPJ_INT32 one, const OPJ_UINT32 vsc = (vscIn); \
OPJ_INT32 *nmsedec, const OPJ_INT32* l_datap = (datapIn); \
OPJ_BYTE type, opj_flag_t* flagsp = (flagspIn); \
OPJ_UINT32 ci, OPJ_UINT32 const flags = *flagsp; \
OPJ_UINT32 vsc) if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
{ (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
OPJ_UINT32 v; OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ /* #ifdef DEBUG_ENC_SIG */ \
/* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \
OPJ_UINT32 const flags = *flagsp; /* #endif */ \
opj_t1_setcurctx(curctx, ctxt1); \
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); } else { \
v = (opj_int_abs(*datap) & one) ? 1 : 0; opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
#ifdef DEBUG_ENC_SIG } \
fprintf(stderr, " ctxt1=%d\n", ctxt1); if (v) { \
#endif OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
opj_mqc_setcurctx(mqc, ctxt1); *flagsp, \
if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ flagsp[-1], flagsp[1], \
opj_mqc_bypass_enc(mqc, v); ci); \
} else { OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
opj_mqc_encode(mqc, v); v = opj_smr_sign(*l_datap); \
} *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
if (v) { (OPJ_UINT32)bpno); \
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( /* #ifdef DEBUG_ENC_SIG */ \
*flagsp, /* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \
flagsp[-1], flagsp[1], /* #endif */ \
ci); opj_t1_setcurctx(curctx, ctxt2); \
OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
v = *datap < 0 ? 1U : 0U; opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
*nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), } else { \
(OPJ_UINT32)bpno); OPJ_UINT32 spb = opj_t1_getspb(lu); \
#ifdef DEBUG_ENC_SIG /* #ifdef DEBUG_ENC_SIG */ \
fprintf(stderr, " ctxt2=%d\n", ctxt2); /* fprintf(stderr, " spb=%d\n", spb); */ \
#endif /* #endif */ \
opj_mqc_setcurctx(mqc, ctxt2); opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ } \
opj_mqc_bypass_enc(mqc, v); opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
} else { } \
OPJ_UINT32 spb = opj_t1_getspb(lu); *flagsp |= T1_PI_THIS << (ci * 3U); \
#ifdef DEBUG_ENC_SIG } \
fprintf(stderr, " spb=%d\n", spb);
#endif
opj_mqc_encode(mqc, v ^ spb);
}
opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
}
*flagsp |= T1_PI_THIS << (ci * 3U);
}
} }
static INLINE void opj_t1_dec_sigpass_step_raw( static INLINE void opj_t1_dec_sigpass_step_raw(
@ -464,63 +463,64 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
opj_flag_t* f = &T1_FLAGS(0, 0); opj_flag_t* f = &T1_FLAGS(0, 0);
OPJ_UINT32 const extra = 2; OPJ_UINT32 const extra = 2;
opj_mqc_t* mqc = &(t1->mqc);
DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
const OPJ_INT32* datap = t1->data;
*nmsedec = 0; *nmsedec = 0;
#ifdef DEBUG_ENC_SIG #ifdef DEBUG_ENC_SIG
fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno); fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
#endif #endif
for (k = 0; k < (t1->h & ~3U); k += 4) { for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
const OPJ_UINT32 w = t1->w;
#ifdef DEBUG_ENC_SIG #ifdef DEBUG_ENC_SIG
fprintf(stderr, " k=%d\n", k); fprintf(stderr, " k=%d\n", k);
#endif #endif
for (i = 0; i < t1->w; ++i) { for (i = 0; i < w; ++i, ++f, datap += 4) {
#ifdef DEBUG_ENC_SIG #ifdef DEBUG_ENC_SIG
fprintf(stderr, " i=%d\n", i); fprintf(stderr, " i=%d\n", i);
#endif #endif
if (*f == 0U) { if (*f == 0U) {
/* Nothing to do for any of the 4 data points */ /* Nothing to do for any of the 4 data points */
f++;
continue; continue;
} }
opj_t1_enc_sigpass_step( opj_t1_enc_sigpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, f,
&t1->data[((k + 0) * t1->data_stride) + i], &datap[0],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
0, cblksty & J2K_CCP_CBLKSTY_VSC); 0, cblksty & J2K_CCP_CBLKSTY_VSC);
opj_t1_enc_sigpass_step( opj_t1_enc_sigpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, f,
&t1->data[((k + 1) * t1->data_stride) + i], &datap[1],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
1, 0); 1, 0);
opj_t1_enc_sigpass_step( opj_t1_enc_sigpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, f,
&t1->data[((k + 2) * t1->data_stride) + i], &datap[2],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
2, 0); 2, 0);
opj_t1_enc_sigpass_step( opj_t1_enc_sigpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, f,
&t1->data[((k + 3) * t1->data_stride) + i], &datap[3],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
3, 0); 3, 0);
++f;
} }
f += extra;
} }
if (k < t1->h) { if (k < t1->h) {
@ -528,20 +528,20 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
#ifdef DEBUG_ENC_SIG #ifdef DEBUG_ENC_SIG
fprintf(stderr, " k=%d\n", k); fprintf(stderr, " k=%d\n", k);
#endif #endif
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i, ++f) {
#ifdef DEBUG_ENC_SIG #ifdef DEBUG_ENC_SIG
fprintf(stderr, " i=%d\n", i); fprintf(stderr, " i=%d\n", i);
#endif #endif
if (*f == 0U) { if (*f == 0U) {
/* Nothing to do for any of the 4 data points */ /* Nothing to do for any of the 4 data points */
f++; datap += (t1->h - k);
continue; continue;
} }
for (j = k; j < t1->h; ++j) { for (j = k; j < t1->h; ++j, ++datap) {
opj_t1_enc_sigpass_step( opj_t1_enc_sigpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, f,
&t1->data[(j * t1->data_stride) + i], &datap[0],
bpno, bpno,
one, one,
nmsedec, nmsedec,
@ -549,9 +549,10 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
j - k, j - k,
(j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
} }
++f;
} }
} }
UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
} }
static void opj_t1_dec_sigpass_raw( static void opj_t1_dec_sigpass_raw(
@ -626,7 +627,7 @@ static void opj_t1_dec_sigpass_raw(
register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \ register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
const OPJ_UINT32 l_w = w; \ const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \ opj_mqc_t* mqc = &(t1->mqc); \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
register OPJ_UINT32 v; \ register OPJ_UINT32 v; \
one = 1 << bpno; \ one = 1 << bpno; \
half = one >> 1; \ half = one >> 1; \
@ -651,7 +652,7 @@ static void opj_t1_dec_sigpass_raw(
} \ } \
} \ } \
} \ } \
UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
if( k < h ) { \ if( k < h ) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
for (j = 0; j < h - k; ++j) { \ for (j = 0; j < h - k; ++j) { \
@ -715,38 +716,27 @@ static void opj_t1_dec_sigpass_mqc(
/** /**
Encode refinement pass step Encode refinement pass step
*/ */
static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, #define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
opj_flag_t *flagsp, {\
OPJ_INT32 *datap, OPJ_UINT32 v; \
OPJ_INT32 bpno, if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
OPJ_INT32 one, const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
OPJ_INT32 *nmsedec, OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
OPJ_BYTE type, OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
OPJ_UINT32 ci) *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
{ (OPJ_UINT32)bpno); \
OPJ_UINT32 v; v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
/* #ifdef DEBUG_ENC_REF */ \
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ /* fprintf(stderr, " ctxt=%d\n", ctxt); */ \
/* #endif */ \
OPJ_UINT32 const shift_flags = opj_t1_setcurctx(curctx, ctxt); \
(*flagsp >> (ci * 3U)); if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { } else { \
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
*nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), } \
(OPJ_UINT32)bpno); flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
v = (opj_int_abs(*datap) & one) ? 1 : 0; } \
#ifdef DEBUG_ENC_REF
fprintf(stderr, " ctxt=%d\n", ctxt);
#endif
opj_mqc_setcurctx(mqc, ctxt);
if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
opj_mqc_bypass_enc(mqc, v);
} else {
opj_mqc_encode(mqc, v);
}
*flagsp |= T1_MU_THIS << (ci * 3U);
}
} }
@ -807,100 +797,104 @@ static void opj_t1_enc_refpass(
const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
opj_flag_t* f = &T1_FLAGS(0, 0); opj_flag_t* f = &T1_FLAGS(0, 0);
const OPJ_UINT32 extra = 2U; const OPJ_UINT32 extra = 2U;
opj_mqc_t* mqc = &(t1->mqc);
DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
const OPJ_INT32* datap = t1->data;
*nmsedec = 0; *nmsedec = 0;
#ifdef DEBUG_ENC_REF #ifdef DEBUG_ENC_REF
fprintf(stderr, "enc_refpass: bpno=%d\n", bpno); fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
#endif #endif
for (k = 0; k < (t1->h & ~3U); k += 4) { for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
#ifdef DEBUG_ENC_REF #ifdef DEBUG_ENC_REF
fprintf(stderr, " k=%d\n", k); fprintf(stderr, " k=%d\n", k);
#endif #endif
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i, f++, datap += 4) {
const OPJ_UINT32 flags = *f;
OPJ_UINT32 flagsUpdated = flags;
#ifdef DEBUG_ENC_REF #ifdef DEBUG_ENC_REF
fprintf(stderr, " i=%d\n", i); fprintf(stderr, " i=%d\n", i);
#endif #endif
if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
/* none significant */ /* none significant */
f++;
continue; continue;
} }
if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
/* all processed by sigpass */ /* all processed by sigpass */
f++;
continue; continue;
} }
opj_t1_enc_refpass_step( opj_t1_enc_refpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, flags, flagsUpdated,
&t1->data[((k + 0) * t1->data_stride) + i], &datap[0],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
0); 0);
opj_t1_enc_refpass_step( opj_t1_enc_refpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, flags, flagsUpdated,
&t1->data[((k + 1) * t1->data_stride) + i], &datap[1],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
1); 1);
opj_t1_enc_refpass_step( opj_t1_enc_refpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, flags, flagsUpdated,
&t1->data[((k + 2) * t1->data_stride) + i], &datap[2],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
2); 2);
opj_t1_enc_refpass_step( opj_t1_enc_refpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, flags, flagsUpdated,
&t1->data[((k + 3) * t1->data_stride) + i], &datap[3],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
3); 3);
++f; *f = flagsUpdated;
} }
f += extra;
} }
if (k < t1->h) { if (k < t1->h) {
OPJ_UINT32 j; OPJ_UINT32 j;
const OPJ_UINT32 remaining_lines = t1->h - k;
#ifdef DEBUG_ENC_REF #ifdef DEBUG_ENC_REF
fprintf(stderr, " k=%d\n", k); fprintf(stderr, " k=%d\n", k);
#endif #endif
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i, ++f) {
#ifdef DEBUG_ENC_REF #ifdef DEBUG_ENC_REF
fprintf(stderr, " i=%d\n", i); fprintf(stderr, " i=%d\n", i);
#endif #endif
if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
/* none significant */ /* none significant */
f++; datap += remaining_lines;
continue; continue;
} }
for (j = k; j < t1->h; ++j) { for (j = 0; j < remaining_lines; ++j, datap ++) {
opj_t1_enc_refpass_step( opj_t1_enc_refpass_step_macro(
t1, mqc, curctx, a, c, ct,
f, *f, *f,
&t1->data[(j * t1->data_stride) + i], &datap[0],
bpno, bpno,
one, one,
nmsedec, nmsedec,
type, type,
j - k); j);
} }
++f;
} }
} }
UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
} }
@ -968,7 +962,7 @@ static void opj_t1_dec_refpass_raw(
register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
const OPJ_UINT32 l_w = w; \ const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \ opj_mqc_t* mqc = &(t1->mqc); \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
register OPJ_UINT32 v; \ register OPJ_UINT32 v; \
one = 1 << bpno; \ one = 1 << bpno; \
poshalf = one >> 1; \ poshalf = one >> 1; \
@ -992,7 +986,7 @@ static void opj_t1_dec_refpass_raw(
} \ } \
} \ } \
} \ } \
UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
if( k < h ) { \ if( k < h ) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
for (j = 0; j < h - k; ++j) { \ for (j = 0; j < h - k; ++j) { \
@ -1030,86 +1024,71 @@ static void opj_t1_dec_refpass_mqc(
/** /**
Encode clean-up pass step Encode clean-up pass step
*/ */
static void opj_t1_enc_clnpass_step( #define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
opj_t1_t *t1, { \
opj_flag_t *flagsp, OPJ_UINT32 v; \
OPJ_INT32 *datap, OPJ_UINT32 ci; \
OPJ_INT32 bpno, opj_flag_t* const flagsp = (flagspIn); \
OPJ_INT32 one, const OPJ_INT32* l_datap = (datapIn); \
OPJ_INT32 *nmsedec, const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
OPJ_UINT32 agg, T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
OPJ_UINT32 runlen, \
OPJ_UINT32 lim, if ((*flagsp & check) == check) { \
OPJ_UINT32 cblksty) if (runlen == 0) { \
{ *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
OPJ_UINT32 v; } else if (runlen == 1) { \
OPJ_UINT32 ci; *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ } else if (runlen == 2) { \
*flagsp &= ~(T1_PI_2 | T1_PI_3); \
const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | } else if (runlen == 3) { \
T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); *flagsp &= ~(T1_PI_3); \
} \
if ((*flagsp & check) == check) { } \
if (runlen == 0) { else \
*flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); for (ci = runlen; ci < lim; ++ci) { \
} else if (runlen == 1) { OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
*flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); if ((agg != 0) && (ci == runlen)) { \
} else if (runlen == 2) { goto_PARTIAL = OPJ_TRUE; \
*flagsp &= ~(T1_PI_2 | T1_PI_3); } \
} else if (runlen == 3) { else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
*flagsp &= ~(T1_PI_3); OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
} /* #ifdef DEBUG_ENC_CLN */ \
return; /* printf(" ctxt1=%d\n", ctxt1); */ \
} /* #endif */ \
opj_t1_setcurctx(curctx, ctxt1); \
for (ci = runlen; ci < lim; ++ci) { v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
OPJ_UINT32 vsc; opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
opj_flag_t flags; if (v) { \
OPJ_UINT32 ctxt1; goto_PARTIAL = OPJ_TRUE; \
} \
flags = *flagsp; } \
if( goto_PARTIAL ) { \
if ((agg != 0) && (ci == runlen)) { OPJ_UINT32 vsc; \
goto LABEL_PARTIAL; OPJ_UINT32 ctxt2, spb; \
} OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
*flagsp, \
if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { flagsp[-1], flagsp[1], \
ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); ci); \
#ifdef DEBUG_ENC_CLN *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
printf(" ctxt1=%d\n", ctxt1); (OPJ_UINT32)bpno); \
#endif ctxt2 = opj_t1_getctxno_sc(lu); \
opj_mqc_setcurctx(mqc, ctxt1); /* #ifdef DEBUG_ENC_CLN */ \
v = (opj_int_abs(*datap) & one) ? 1 : 0; /* printf(" ctxt2=%d\n", ctxt2); */ \
opj_mqc_encode(mqc, v); /* #endif */ \
if (v) { opj_t1_setcurctx(curctx, ctxt2); \
OPJ_UINT32 ctxt2, spb; \
OPJ_UINT32 lu; v = opj_smr_sign(*l_datap); \
LABEL_PARTIAL: spb = opj_t1_getspb(lu); \
lu = opj_t1_getctxtno_sc_or_spb_index( /* #ifdef DEBUG_ENC_CLN */ \
*flagsp, /* printf(" spb=%d\n", spb); */\
flagsp[-1], flagsp[1], /* #endif */ \
ci); opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
*nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
(OPJ_UINT32)bpno); opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
ctxt2 = opj_t1_getctxno_sc(lu); } \
#ifdef DEBUG_ENC_CLN *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
printf(" ctxt2=%d\n", ctxt2); l_datap ++; \
#endif } \
opj_mqc_setcurctx(mqc, ctxt2);
v = *datap < 0 ? 1U : 0U;
spb = opj_t1_getspb(lu);
#ifdef DEBUG_ENC_CLN
printf(" spb=%d\n", spb);
#endif
opj_mqc_encode(mqc, v ^ spb);
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
}
}
*flagsp &= ~(T1_PI_THIS << (3U * ci));
datap += t1->data_stride;
}
} }
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
@ -1165,47 +1144,50 @@ static void opj_t1_enc_clnpass(
{ {
OPJ_UINT32 i, k; OPJ_UINT32 i, k;
const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
OPJ_UINT32 agg, runlen; opj_mqc_t* mqc = &(t1->mqc);
DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ const OPJ_INT32* datap = t1->data;
opj_flag_t *f = &T1_FLAGS(0, 0);
const OPJ_UINT32 extra = 2U;
*nmsedec = 0; *nmsedec = 0;
#ifdef DEBUG_ENC_CLN #ifdef DEBUG_ENC_CLN
printf("enc_clnpass: bpno=%d\n", bpno); printf("enc_clnpass: bpno=%d\n", bpno);
#endif #endif
for (k = 0; k < (t1->h & ~3U); k += 4) { for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
#ifdef DEBUG_ENC_CLN #ifdef DEBUG_ENC_CLN
printf(" k=%d\n", k); printf(" k=%d\n", k);
#endif #endif
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i, f++) {
OPJ_UINT32 agg, runlen;
#ifdef DEBUG_ENC_CLN #ifdef DEBUG_ENC_CLN
printf(" i=%d\n", i); printf(" i=%d\n", i);
#endif #endif
agg = !(T1_FLAGS(i, k)); agg = !*f;
#ifdef DEBUG_ENC_CLN #ifdef DEBUG_ENC_CLN
printf(" agg=%d\n", agg); printf(" agg=%d\n", agg);
#endif #endif
if (agg) { if (agg) {
for (runlen = 0; runlen < 4; ++runlen) { for (runlen = 0; runlen < 4; ++runlen, ++datap) {
if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) { if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
break; break;
} }
} }
opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
opj_mqc_encode(mqc, runlen != 4); opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
if (runlen == 4) { if (runlen == 4) {
continue; continue;
} }
opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
opj_mqc_encode(mqc, runlen >> 1); opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
opj_mqc_encode(mqc, runlen & 1); opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
} else { } else {
runlen = 0; runlen = 0;
} }
opj_t1_enc_clnpass_step( opj_t1_enc_clnpass_step_macro(
t1, mqc, curctx, a, c, ct,
&T1_FLAGS(i, k), f,
&t1->data[((k + runlen) * t1->data_stride) + i], datap,
bpno, bpno,
one, one,
nmsedec, nmsedec,
@ -1213,23 +1195,24 @@ static void opj_t1_enc_clnpass(
runlen, runlen,
4U, 4U,
cblksty); cblksty);
datap += 4 - runlen;
} }
} }
if (k < t1->h) { if (k < t1->h) {
agg = 0; const OPJ_UINT32 agg = 0;
runlen = 0; const OPJ_UINT32 runlen = 0;
#ifdef DEBUG_ENC_CLN #ifdef DEBUG_ENC_CLN
printf(" k=%d\n", k); printf(" k=%d\n", k);
#endif #endif
for (i = 0; i < t1->w; ++i) { for (i = 0; i < t1->w; ++i, f++) {
#ifdef DEBUG_ENC_CLN #ifdef DEBUG_ENC_CLN
printf(" i=%d\n", i); printf(" i=%d\n", i);
printf(" agg=%d\n", agg); printf(" agg=%d\n", agg);
#endif #endif
opj_t1_enc_clnpass_step( opj_t1_enc_clnpass_step_macro(
t1, mqc, curctx, a, c, ct,
&T1_FLAGS(i, k), f,
&t1->data[((k + runlen) * t1->data_stride) + i], datap,
bpno, bpno,
one, one,
nmsedec, nmsedec,
@ -1237,8 +1220,11 @@ static void opj_t1_enc_clnpass(
runlen, runlen,
t1->h - k, t1->h - k,
cblksty); cblksty);
datap += t1->h - k;
} }
} }
UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
} }
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \ #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
@ -1250,7 +1236,7 @@ static void opj_t1_enc_clnpass(
opj_mqc_t* mqc = &(t1->mqc); \ opj_mqc_t* mqc = &(t1->mqc); \
register OPJ_INT32 *data = t1->data; \ register OPJ_INT32 *data = t1->data; \
register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
register OPJ_UINT32 v; \ register OPJ_UINT32 v; \
one = 1 << bpno; \ one = 1 << bpno; \
half = one >> 1; \ half = one >> 1; \
@ -1319,7 +1305,7 @@ static void opj_t1_enc_clnpass(
*flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \ } \
} \ } \
UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
if( k < h ) { \ if( k < h ) { \
for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
for (j = 0; j < h - k; ++j) { \ for (j = 0; j < h - k; ++j) { \
@ -1427,7 +1413,7 @@ static OPJ_FLOAT64 opj_t1_getwmsedec(
w2 = opj_dwt_getnorm(level, orient); w2 = opj_dwt_getnorm(level, orient);
} else { /* if (qmfbid == 0) */ } else { /* if (qmfbid == 0) */
const OPJ_INT32 log2_gain = (orient == 0) ? 0 : const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
(orient == 3) ? 2 : 1; (orient == 3) ? 2 : 1;
w2 = opj_dwt_getnorm_real(level, orient); w2 = opj_dwt_getnorm_real(level, orient);
/* Not sure this is right. But preserves past behaviour */ /* Not sure this is right. But preserves past behaviour */
stepsize /= (1 << log2_gain); stepsize /= (1 << log2_gain);
@ -1454,7 +1440,7 @@ static OPJ_BOOL opj_t1_allocate_buffers(
assert(w * h <= 4096); assert(w * h <= 4096);
/* encoder uses tile buffer, so no need to allocate */ /* encoder uses tile buffer, so no need to allocate */
if (!t1->encoder) { {
OPJ_UINT32 datasize = w * h; OPJ_UINT32 datasize = w * h;
if (datasize > t1->datasize) { if (datasize > t1->datasize) {
@ -1564,8 +1550,7 @@ void opj_t1_destroy(opj_t1_t *p_t1)
return; return;
} }
/* encoder uses tile buffer, so no need to free */ if (p_t1->data) {
if (!p_t1->encoder && p_t1->data) {
opj_aligned_free(p_t1->data); opj_aligned_free(p_t1->data);
p_t1->data = 00; p_t1->data = 00;
} }
@ -2140,8 +2125,7 @@ static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
OPJ_INT32* OPJ_RESTRICT tiledp; OPJ_INT32* OPJ_RESTRICT tiledp;
OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_w;
OPJ_UINT32 cblk_h; OPJ_UINT32 cblk_h;
OPJ_UINT32 i, j, tileLineAdvance; OPJ_UINT32 i, j;
OPJ_SIZE_T tileIndex = 0;
OPJ_INT32 x = cblk->x0 - band->x0; OPJ_INT32 x = cblk->x0 - band->x0;
OPJ_INT32 y = cblk->y0 - band->y0; OPJ_INT32 y = cblk->y0 - band->y0;
@ -2177,11 +2161,9 @@ static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
cblk_w = t1->w; cblk_w = t1->w;
cblk_h = t1->h; cblk_h = t1->h;
tileLineAdvance = tile_w - cblk_w;
tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
t1->data = tiledp;
t1->data_stride = tile_w;
if (tccp->qmfbid == 1) { if (tccp->qmfbid == 1) {
/* Do multiplication on unsigned type, even if the /* Do multiplication on unsigned type, even if the
* underlying type is signed, to avoid potential * underlying type is signed, to avoid potential
@ -2192,22 +2174,52 @@ static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
* Fixes https://github.com/uclouvain/openjpeg/issues/1053 * Fixes https://github.com/uclouvain/openjpeg/issues/1053
*/ */
OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
for (j = 0; j < cblk_h; ++j) { OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
/* Change from "natural" order to "zigzag" order of T1 passes */
for (j = 0; j < (cblk_h & ~3U); j += 4) {
for (i = 0; i < cblk_w; ++i) { for (i = 0; i < cblk_w; ++i) {
tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS; t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
tileIndex++; t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
t1data += 4;
}
}
if (j < cblk_h) {
for (i = 0; i < cblk_w; ++i) {
OPJ_UINT32 k;
for (k = j; k < cblk_h; k++) {
t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
t1data ++;
}
} }
tileIndex += tileLineAdvance;
} }
} else { /* if (tccp->qmfbid == 0) */ } else { /* if (tccp->qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
/* Change from "natural" order to "zigzag" order of T1 passes */
for (j = 0; j < (cblk_h & ~3U); j += 4) {
for (i = 0; i < cblk_w; ++i) { for (i = 0; i < cblk_w; ++i) {
OPJ_FLOAT32 tmp = ((OPJ_FLOAT32*)tiledp)[tileIndex]; t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
tiledp[tileIndex] = (OPJ_INT32)opj_lrintf((tmp / band->stepsize) * band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
(1 << T1_NMSEDEC_FRACBITS)); t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
tileIndex++; band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
t1data += 4;
}
}
if (j < cblk_h) {
for (i = 0; i < cblk_w; ++i) {
OPJ_UINT32 k;
for (k = j; k < cblk_h; k++) {
t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
* (1 << T1_NMSEDEC_FRACBITS));
t1data ++;
}
} }
tileIndex += tileLineAdvance;
} }
} }
@ -2363,6 +2375,7 @@ static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
OPJ_UINT32 i, j; OPJ_UINT32 i, j;
OPJ_BYTE type = T1_TYPE_MQ; OPJ_BYTE type = T1_TYPE_MQ;
OPJ_FLOAT64 tempwmsedec; OPJ_FLOAT64 tempwmsedec;
OPJ_INT32* datap;
#ifdef EXTRA_DEBUG #ifdef EXTRA_DEBUG
printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n", printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
@ -2372,10 +2385,19 @@ static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
max = 0; max = 0;
for (i = 0; i < t1->w; ++i) { datap = t1->data;
for (j = 0; j < t1->h; ++j) { for (j = 0; j < t1->h; ++j) {
OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]); const OPJ_UINT32 w = t1->w;
max = opj_int_max(max, tmp); for (i = 0; i < w; ++i, ++datap) {
OPJ_INT32 tmp = *datap;
if (tmp < 0) {
OPJ_UINT32 tmp_unsigned;
max = opj_int_max(max, -tmp);
tmp_unsigned = opj_to_smr(tmp);
memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
} else {
max = opj_int_max(max, tmp);
}
} }
} }

View File

@ -198,7 +198,6 @@ typedef struct opj_t1 {
OPJ_UINT32 h; OPJ_UINT32 h;
OPJ_UINT32 datasize; OPJ_UINT32 datasize;
OPJ_UINT32 flagssize; OPJ_UINT32 flagssize;
OPJ_UINT32 data_stride;
OPJ_BOOL encoder; OPJ_BOOL encoder;
/* Thre 3 variables below are only used by the decoder */ /* Thre 3 variables below are only used by the decoder */