openjpeg/src/lib/openjp2/t1.c

2371 lines
78 KiB
C
Raw Normal View History

2003-11-27 11:10:17 +01:00
/*
* The copyright in this software is being made available under the 2-clauses
* BSD License, included below. This software may be subject to other third
* party and contributor rights, including patent rights, and no such rights
* are granted under this license.
*
* Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
* Copyright (c) 2002-2014, Professor Benoit Macq
* Copyright (c) 2001-2003, David Janssens
2003-11-27 11:10:17 +01:00
* Copyright (c) 2002-2003, Yannick Verschueren
* Copyright (c) 2003-2007, Francois-Olivier Devaux
* Copyright (c) 2003-2014, Antonin Descampe
2007-01-15 10:55:40 +01:00
* Copyright (c) 2005, Herve Drolon, FreeImage Team
* Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
* Copyright (c) 2012, Carl Hetherington
2017-05-22 18:42:46 +02:00
* Copyright (c) 2017, IntoPIX SA <support@intopix.com>
2003-11-27 11:10:17 +01:00
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "opj_includes.h"
#include "t1_luts.h"
2003-11-27 11:10:17 +01:00
2005-12-08 10:38:47 +01:00
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
/*@{*/
#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
#define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
2005-12-08 10:38:47 +01:00
/** @name Local static functions */
/*@{*/
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
OPJ_UINT32 s, OPJ_UINT32 stride,
OPJ_UINT32 vsc);
2017-05-22 18:42:46 +02:00
/**
Decode significant pass
*/
static INLINE void opj_t1_dec_sigpass_step_raw(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 vsc,
OPJ_UINT32 row);
static INLINE void opj_t1_dec_sigpass_step_mqc(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 row,
OPJ_UINT32 flags_stride,
OPJ_UINT32 vsc);
2005-12-08 10:38:47 +01:00
/**
Encode significant pass
*/
static void opj_t1_enc_sigpass(opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 *nmsedec,
OPJ_BYTE type,
OPJ_UINT32 cblksty);
/**
Decode significant pass
*/
static void opj_t1_dec_sigpass_raw(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 cblksty);
2005-12-08 10:38:47 +01:00
/**
Encode refinement pass
*/
static void opj_t1_enc_refpass(opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 *nmsedec,
OPJ_BYTE type);
/**
Decode refinement pass
*/
static void opj_t1_dec_refpass_raw(
opj_t1_t *t1,
OPJ_INT32 bpno);
/**
Decode refinement pass
*/
static INLINE void opj_t1_dec_refpass_step_raw(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 poshalf,
OPJ_UINT32 row);
static INLINE void opj_t1_dec_refpass_step_mqc(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 poshalf,
OPJ_UINT32 row);
2005-12-08 10:38:47 +01:00
/**
Decode clean-up pass
*/
static void opj_t1_dec_clnpass_step(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 row,
OPJ_UINT32 vsc);
2005-12-08 10:38:47 +01:00
/**
Encode clean-up pass
*/
static void opj_t1_enc_clnpass(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 *nmsedec,
OPJ_UINT32 cblksty);
static OPJ_FLOAT64 opj_t1_getwmsedec(
OPJ_INT32 nmsedec,
OPJ_UINT32 compno,
OPJ_UINT32 level,
OPJ_UINT32 orient,
OPJ_INT32 bpno,
OPJ_UINT32 qmfbid,
OPJ_FLOAT64 stepsize,
OPJ_UINT32 numcomps,
const OPJ_FLOAT64 * mct_norms,
OPJ_UINT32 mct_numcomps);
static void opj_t1_encode_cblk(opj_t1_t *t1,
opj_tcd_cblk_enc_t* cblk,
OPJ_UINT32 orient,
OPJ_UINT32 compno,
OPJ_UINT32 level,
OPJ_UINT32 qmfbid,
OPJ_FLOAT64 stepsize,
OPJ_UINT32 cblksty,
OPJ_UINT32 numcomps,
opj_tcd_tile_t * tile,
const OPJ_FLOAT64 * mct_norms,
OPJ_UINT32 mct_numcomps);
2005-12-08 10:38:47 +01:00
/**
Decode 1 code-block
@param t1 T1 handle
@param cblk Code-block coding parameters
@param orient
@param roishift Region of interest shifting value
@param cblksty Code-block style
@param p_manager the event manager
@param p_manager_mutex mutex for the event manager
@param check_pterm whether PTERM correct termination should be checked
2005-12-08 10:38:47 +01:00
*/
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
opj_tcd_cblk_dec_t* cblk,
OPJ_UINT32 orient,
OPJ_UINT32 roishift,
OPJ_UINT32 cblksty,
opj_event_mgr_t *p_manager,
opj_mutex_t* p_manager_mutex,
OPJ_BOOL check_pterm);
2005-12-08 10:38:47 +01:00
static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
OPJ_UINT32 w,
OPJ_UINT32 h);
2005-12-08 10:38:47 +01:00
/*@}*/
/*@}*/
/* ----------------------------------------------------------------------- */
2003-11-27 11:10:17 +01:00
static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
{
return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
}
static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
OPJ_UINT32 pfX,
OPJ_UINT32 nfX,
OPJ_UINT32 ci)
{
/*
0 pfX T1_CHI_THIS T1_LUT_SGN_W
1 tfX T1_SIGMA_1 T1_LUT_SIG_N
2 nfX T1_CHI_THIS T1_LUT_SGN_E
3 tfX T1_SIGMA_3 T1_LUT_SIG_W
4 fX T1_CHI_(THIS - 1) T1_LUT_SGN_N
5 tfX T1_SIGMA_5 T1_LUT_SIG_E
6 fX T1_CHI_(THIS + 1) T1_LUT_SGN_S
7 tfX T1_SIGMA_7 T1_LUT_SIG_S
*/
OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
T1_SIGMA_7);
lu |= (pfX >> (T1_CHI_THIS_I + (ci * 3U))) & (1U << 0);
lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
if (ci == 0U) {
lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
} else {
lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
}
lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
return lu;
2003-11-27 11:10:17 +01:00
}
static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
{
return lut_ctxno_sc[lu];
2003-11-27 11:10:17 +01:00
}
static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
{
OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
return tmp2;
2003-11-27 11:10:17 +01:00
}
static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
{
return lut_spb[lu];
2003-11-27 11:10:17 +01:00
}
static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
{
if (bitpos > 0) {
return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
}
return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
2003-11-27 11:10:17 +01:00
}
static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
{
if (bitpos > 0) {
return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
}
return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
2003-11-27 11:10:17 +01:00
}
#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
{ \
/* east */ \
flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
\
/* mark target as significant */ \
flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
\
/* west */ \
flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
\
/* north-west, north, north-east */ \
if (ci == 0U && !(vsc)) { \
opj_flag_t* north = flagsp - (stride); \
*north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
north[-1] |= T1_SIGMA_17; \
north[1] |= T1_SIGMA_15; \
} \
\
/* south-west, south, south-east */ \
if (ci == 3U) { \
opj_flag_t* south = flagsp + (stride); \
*south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
south[-1] |= T1_SIGMA_2; \
south[1] |= T1_SIGMA_0; \
} \
2003-11-27 11:10:17 +01:00
}
static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
OPJ_UINT32 s, OPJ_UINT32 stride,
OPJ_UINT32 vsc)
{
opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
}
2017-05-22 18:42:46 +02:00
/**
Encode significant pass
*/
static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
opj_flag_t *flagsp,
2017-05-22 18:42:46 +02:00
OPJ_INT32 *datap,
OPJ_INT32 bpno,
OPJ_INT32 one,
OPJ_INT32 *nmsedec,
OPJ_BYTE type,
OPJ_UINT32 ci,
OPJ_UINT32 vsc)
{
2017-05-23 14:55:45 +02:00
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
OPJ_UINT32 const flags = *flagsp;
2017-05-22 18:42:46 +02:00
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
2017-05-22 18:42:46 +02:00
v = opj_int_abs(*datap) & one ? 1 : 0;
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " ctxt1=%d\n", ctxt1);
#endif
opj_mqc_setcurctx(mqc, ctxt1);
if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
2017-05-22 18:42:46 +02:00
opj_mqc_bypass_enc(mqc, v);
} else {
2017-05-22 18:42:46 +02:00
opj_mqc_encode(mqc, v);
}
if (v) {
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
*flagsp,
flagsp[-1], flagsp[1],
ci);
OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
2017-06-17 14:09:31 +02:00
v = *datap < 0 ? 1U : 0U;
*nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
2017-05-23 14:55:45 +02:00
(OPJ_UINT32)bpno);
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " ctxt2=%d\n", ctxt2);
#endif
opj_mqc_setcurctx(mqc, ctxt2);
if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
opj_mqc_bypass_enc(mqc, v);
} else {
OPJ_UINT32 spb = opj_t1_getspb(lu);
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " spb=%d\n", spb);
#endif
opj_mqc_encode(mqc, v ^ spb);
}
opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
}
2017-05-22 18:42:46 +02:00
*flagsp |= T1_PI_THIS << (ci * 3U);
}
2003-11-27 11:10:17 +01:00
}
static INLINE void opj_t1_dec_sigpass_step_raw(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 vsc,
OPJ_UINT32 ci)
{
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* RAW component */
OPJ_UINT32 const flags = *flagsp;
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
if (opj_mqc_raw_decode(mqc)) {
v = opj_mqc_raw_decode(mqc);
*datap = v ? -oneplushalf : oneplushalf;
opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
}
*flagsp |= T1_PI_THIS << (ci * 3U);
}
}
#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
data_stride, ci, mqc, curctx, \
v, a, c, ct, oneplushalf, vsc) \
{ \
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
opj_t1_setcurctx(curctx, ctxt1); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
if (v) { \
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
flags, \
flagsp[-1], flagsp[1], \
ci); \
OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
OPJ_UINT32 spb = opj_t1_getspb(lu); \
opj_t1_setcurctx(curctx, ctxt2); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
v = v ^ spb; \
data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
} \
flags |= T1_PI_THIS << (ci * 3U); \
} \
}
static INLINE void opj_t1_dec_sigpass_step_mqc(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 ci,
OPJ_UINT32 flags_stride,
OPJ_UINT32 vsc)
{
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
0, ci, mqc, mqc->curctx,
v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
}
static void opj_t1_enc_sigpass(opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 *nmsedec,
OPJ_BYTE type,
OPJ_UINT32 cblksty
)
{
OPJ_UINT32 i, k;
OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
opj_flag_t* f = &T1_FLAGS(0, 0);
OPJ_UINT32 const extra = 2;
*nmsedec = 0;
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_SIG
fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
#endif
for (k = 0; k < (t1->h & ~3U); k += 4) {
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " k=%d\n", k);
#endif
for (i = 0; i < t1->w; ++i) {
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " i=%d\n", i);
#endif
if (*f == 0U) {
/* Nothing to do for any of the 4 data points */
f++;
continue;
}
opj_t1_enc_sigpass_step(
t1,
f,
2017-05-22 18:42:46 +02:00
&t1->data[((k + 0) * t1->data_stride) + i],
bpno,
one,
nmsedec,
2017-05-22 18:42:46 +02:00
type,
0, cblksty & J2K_CCP_CBLKSTY_VSC);
2017-05-22 18:42:46 +02:00
opj_t1_enc_sigpass_step(
t1,
f,
&t1->data[((k + 1) * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
1, 0);
opj_t1_enc_sigpass_step(
t1,
f,
&t1->data[((k + 2) * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
2, 0);
opj_t1_enc_sigpass_step(
t1,
f,
&t1->data[((k + 3) * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
3, 0);
++f;
}
f += extra;
}
2017-05-22 18:42:46 +02:00
if (k < t1->h) {
OPJ_UINT32 j;
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " k=%d\n", k);
#endif
for (i = 0; i < t1->w; ++i) {
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " i=%d\n", i);
#endif
if (*f == 0U) {
/* Nothing to do for any of the 4 data points */
f++;
continue;
}
for (j = k; j < t1->h; ++j) {
opj_t1_enc_sigpass_step(
t1,
2017-05-22 18:42:46 +02:00
f,
&t1->data[(j * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
2017-05-22 18:42:46 +02:00
j - k,
(j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
}
2017-05-22 18:42:46 +02:00
++f;
}
}
2003-11-27 11:10:17 +01:00
}
static void opj_t1_dec_sigpass_raw(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 cblksty)
{
OPJ_INT32 one, half, oneplushalf;
OPJ_UINT32 i, j, k;
OPJ_INT32 *data = t1->data;
opj_flag_t *flagsp = &T1_FLAGS(0, 0);
const OPJ_UINT32 l_w = t1->w;
one = 1 << bpno;
half = one >> 1;
oneplushalf = one | half;
for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
opj_flag_t flags = *flagsp;
if (flags != 0) {
opj_t1_dec_sigpass_step_raw(
t1,
flagsp,
data,
oneplushalf,
cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
0U);
opj_t1_dec_sigpass_step_raw(
t1,
flagsp,
data + l_w,
oneplushalf,
OPJ_FALSE, /* vsc */
1U);
opj_t1_dec_sigpass_step_raw(
t1,
flagsp,
data + 2 * l_w,
oneplushalf,
OPJ_FALSE, /* vsc */
2U);
opj_t1_dec_sigpass_step_raw(
t1,
flagsp,
data + 3 * l_w,
oneplushalf,
OPJ_FALSE, /* vsc */
3U);
}
}
}
if (k < t1->h) {
for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
for (j = 0; j < t1->h - k; ++j) {
opj_t1_dec_sigpass_step_raw(
t1,
flagsp,
data + j * l_w,
oneplushalf,
cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
j);
}
}
}
}
#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
{ \
OPJ_INT32 one, half, oneplushalf; \
OPJ_UINT32 i, j, k; \
register OPJ_INT32 *data = t1->data; \
register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \
half = one >> 1; \
oneplushalf = one | half; \
for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
opj_flag_t flags = *flagsp; \
if( flags != 0 ) { \
opj_t1_dec_sigpass_step_mqc_macro( \
flags, flagsp, flags_stride, data, \
l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
opj_t1_dec_sigpass_step_mqc_macro( \
flags, flagsp, flags_stride, data, \
l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
opj_t1_dec_sigpass_step_mqc_macro( \
flags, flagsp, flags_stride, data, \
l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
opj_t1_dec_sigpass_step_mqc_macro( \
flags, flagsp, flags_stride, data, \
l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
*flagsp = flags; \
} \
} \
} \
UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
if( k < h ) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
for (j = 0; j < h - k; ++j) { \
opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
data + j * l_w, oneplushalf, j, flags_stride, vsc); \
} \
} \
} \
}
static void opj_t1_dec_sigpass_mqc_64x64_novsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
}
static void opj_t1_dec_sigpass_mqc_64x64_vsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
}
static void opj_t1_dec_sigpass_mqc_generic_novsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
t1->w + 2U);
}
static void opj_t1_dec_sigpass_mqc_generic_vsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
t1->w + 2U);
}
static void opj_t1_dec_sigpass_mqc(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 cblksty)
{
if (t1->w == 64 && t1->h == 64) {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
} else {
opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
}
} else {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
} else {
opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
}
}
}
2003-11-27 11:10:17 +01:00
2017-05-22 18:42:46 +02:00
/**
Encode refinement pass step
*/
static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
opj_flag_t *flagsp,
2017-05-22 18:42:46 +02:00
OPJ_INT32 *datap,
OPJ_INT32 bpno,
OPJ_INT32 one,
OPJ_INT32 *nmsedec,
OPJ_BYTE type,
OPJ_UINT32 ci)
{
2017-05-23 14:55:45 +02:00
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
2017-05-22 18:42:46 +02:00
OPJ_UINT32 const shift_flags =
(*flagsp >> (ci * 3U));
if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
*nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
2017-05-23 14:55:45 +02:00
(OPJ_UINT32)bpno);
2017-05-22 18:42:46 +02:00
v = opj_int_abs(*datap) & one ? 1 : 0;
#ifdef DEBUG_ENC_REF
fprintf(stderr, " ctxt=%d\n", ctxt);
#endif
opj_mqc_setcurctx(mqc, ctxt);
if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
2017-05-22 18:42:46 +02:00
opj_mqc_bypass_enc(mqc, v);
} else {
2017-05-22 18:42:46 +02:00
opj_mqc_encode(mqc, v);
}
2017-05-22 18:42:46 +02:00
*flagsp |= T1_MU_THIS << (ci * 3U);
}
2003-11-27 11:10:17 +01:00
}
static INLINE void opj_t1_dec_refpass_step_raw(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 poshalf,
OPJ_UINT32 ci)
{
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* RAW component */
if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
(T1_SIGMA_THIS << (ci * 3U))) {
v = opj_mqc_raw_decode(mqc);
*datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
*flagsp |= T1_MU_THIS << (ci * 3U);
}
}
#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
mqc, curctx, v, a, c, ct, poshalf) \
{ \
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
(T1_SIGMA_THIS << (ci * 3U))) { \
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
opj_t1_setcurctx(curctx, ctxt); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
flags |= T1_MU_THIS << (ci * 3U); \
} \
}
static INLINE void opj_t1_dec_refpass_step_mqc(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 poshalf,
OPJ_UINT32 ci)
{
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
mqc, mqc->curctx, v, mqc->a, mqc->c,
mqc->ct, poshalf);
}
2003-11-27 11:10:17 +01:00
static void opj_t1_enc_refpass(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 *nmsedec,
OPJ_BYTE type)
{
OPJ_UINT32 i, k;
const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
opj_flag_t* f = &T1_FLAGS(0, 0);
const OPJ_UINT32 extra = 2U;
*nmsedec = 0;
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_REF
fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
#endif
for (k = 0; k < (t1->h & ~3U); k += 4) {
#ifdef DEBUG_ENC_REF
fprintf(stderr, " k=%d\n", k);
#endif
for (i = 0; i < t1->w; ++i) {
#ifdef DEBUG_ENC_REF
fprintf(stderr, " i=%d\n", i);
#endif
if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
/* none significant */
f++;
continue;
}
if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
/* all processed by sigpass */
f++;
continue;
}
opj_t1_enc_refpass_step(
t1,
f,
&t1->data[((k + 0) * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
0);
opj_t1_enc_refpass_step(
t1,
f,
2017-05-22 18:42:46 +02:00
&t1->data[((k + 1) * t1->data_stride) + i],
bpno,
one,
nmsedec,
2017-05-22 18:42:46 +02:00
type,
1);
2017-05-22 18:42:46 +02:00
opj_t1_enc_refpass_step(
t1,
f,
&t1->data[((k + 2) * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
2);
2017-05-22 18:42:46 +02:00
opj_t1_enc_refpass_step(
t1,
f,
&t1->data[((k + 3) * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
3);
++f;
}
f += extra;
}
2017-05-22 18:42:46 +02:00
if (k < t1->h) {
OPJ_UINT32 j;
#ifdef DEBUG_ENC_REF
fprintf(stderr, " k=%d\n", k);
#endif
for (i = 0; i < t1->w; ++i) {
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_REF
fprintf(stderr, " i=%d\n", i);
#endif
if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
/* none significant */
f++;
continue;
}
for (j = k; j < t1->h; ++j) {
opj_t1_enc_refpass_step(
t1,
2017-05-22 18:42:46 +02:00
f,
&t1->data[(j * t1->data_stride) + i],
bpno,
one,
nmsedec,
type,
j - k);
}
2017-05-22 18:42:46 +02:00
++f;
}
}
2003-11-27 11:10:17 +01:00
}
static void opj_t1_dec_refpass_raw(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
OPJ_INT32 one, poshalf;
OPJ_UINT32 i, j, k;
OPJ_INT32 *data = t1->data;
opj_flag_t *flagsp = &T1_FLAGS(0, 0);
const OPJ_UINT32 l_w = t1->w;
one = 1 << bpno;
poshalf = one >> 1;
for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
opj_flag_t flags = *flagsp;
if (flags != 0) {
opj_t1_dec_refpass_step_raw(
t1,
flagsp,
data,
poshalf,
0U);
opj_t1_dec_refpass_step_raw(
t1,
flagsp,
data + l_w,
poshalf,
1U);
opj_t1_dec_refpass_step_raw(
t1,
flagsp,
data + 2 * l_w,
poshalf,
2U);
opj_t1_dec_refpass_step_raw(
t1,
flagsp,
data + 3 * l_w,
poshalf,
3U);
}
}
}
if (k < t1->h) {
for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
for (j = 0; j < t1->h - k; ++j) {
opj_t1_dec_refpass_step_raw(
t1,
flagsp,
data + j * l_w,
poshalf,
j);
}
}
}
}
#define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
{ \
OPJ_INT32 one, poshalf; \
OPJ_UINT32 i, j, k; \
register OPJ_INT32 *data = t1->data; \
register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \
poshalf = one >> 1; \
for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
opj_flag_t flags = *flagsp; \
if( flags != 0 ) { \
opj_t1_dec_refpass_step_mqc_macro( \
flags, data, l_w, 0, \
mqc, curctx, v, a, c, ct, poshalf); \
opj_t1_dec_refpass_step_mqc_macro( \
flags, data, l_w, 1, \
mqc, curctx, v, a, c, ct, poshalf); \
opj_t1_dec_refpass_step_mqc_macro( \
flags, data, l_w, 2, \
mqc, curctx, v, a, c, ct, poshalf); \
opj_t1_dec_refpass_step_mqc_macro( \
flags, data, l_w, 3, \
mqc, curctx, v, a, c, ct, poshalf); \
*flagsp = flags; \
} \
} \
} \
UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
if( k < h ) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
for (j = 0; j < h - k; ++j) { \
opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
} \
} \
} \
}
static void opj_t1_dec_refpass_mqc_64x64(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
}
static void opj_t1_dec_refpass_mqc_generic(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
}
static void opj_t1_dec_refpass_mqc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
if (t1->w == 64 && t1->h == 64) {
opj_t1_dec_refpass_mqc_64x64(t1, bpno);
} else {
opj_t1_dec_refpass_mqc_generic(t1, bpno);
}
}
2003-11-27 11:10:17 +01:00
2017-05-22 18:42:46 +02:00
/**
Encode clean-up pass step
*/
static void opj_t1_enc_clnpass_step(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 bpno,
OPJ_INT32 one,
OPJ_INT32 *nmsedec,
OPJ_UINT32 agg,
OPJ_UINT32 runlen,
2017-05-23 00:40:30 +02:00
OPJ_UINT32 lim,
2017-05-22 18:42:46 +02:00
OPJ_UINT32 cblksty)
{
2017-05-23 14:55:45 +02:00
OPJ_UINT32 v;
OPJ_UINT32 ci;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
2017-05-23 00:40:30 +02:00
const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
if ((*flagsp & check) == check) {
if (runlen == 0) {
*flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
} else if (runlen == 1) {
*flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
} else if (runlen == 2) {
*flagsp &= ~(T1_PI_2 | T1_PI_3);
} else if (runlen == 3) {
*flagsp &= ~(T1_PI_3);
}
2017-05-23 00:40:30 +02:00
return;
}
2003-11-27 11:10:17 +01:00
for (ci = runlen; ci < lim; ++ci) {
2017-05-22 18:42:46 +02:00
OPJ_UINT32 vsc;
opj_flag_t flags;
flags = *flagsp;
if ((agg != 0) && (ci == runlen)) {
goto LABEL_PARTIAL;
}
2017-05-22 18:42:46 +02:00
if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf(" ctxt1=%d\n", ctxt1);
#endif
opj_mqc_setcurctx(mqc, ctxt1);
v = opj_int_abs(*datap) & one ? 1 : 0;
opj_mqc_encode(mqc, v);
if (v) {
2017-05-22 18:42:46 +02:00
OPJ_UINT32 ctxt2, spb;
OPJ_UINT32 lu;
LABEL_PARTIAL:
lu = opj_t1_getctxtno_sc_or_spb_index(
*flagsp,
flagsp[-1], flagsp[1],
ci);
2017-05-23 14:55:45 +02:00
*nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
(OPJ_UINT32)bpno);
ctxt2 = opj_t1_getctxno_sc(lu);
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf(" ctxt2=%d\n", ctxt2);
#endif
opj_mqc_setcurctx(mqc, ctxt2);
2017-06-17 14:09:31 +02:00
v = *datap < 0 ? 1U : 0U;
spb = opj_t1_getspb(lu);
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf(" spb=%d\n", spb);
#endif
opj_mqc_encode(mqc, v ^ spb);
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
}
}
2017-05-22 18:42:46 +02:00
*flagsp &= ~(T1_PI_THIS << (3U * ci));
datap += t1->data_stride;
}
}
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
flags, flagsp, flags_stride, data, \
data_stride, ci, mqc, curctx, \
v, a, c, ct, oneplushalf, vsc) \
{ \
if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
do { \
if( !partial ) { \
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
opj_t1_setcurctx(curctx, ctxt1); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
if( !v ) \
break; \
} \
{ \
OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
flags, flagsp[-1], flagsp[1], \
ci); \
opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
v = v ^ opj_t1_getspb(lu); \
data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
} \
} while(0); \
} \
}
static void opj_t1_dec_clnpass_step(
opj_t1_t *t1,
opj_flag_t *flagsp,
OPJ_INT32 *datap,
OPJ_INT32 oneplushalf,
OPJ_UINT32 ci,
OPJ_UINT32 vsc)
{
OPJ_UINT32 v;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
*flagsp, flagsp, t1->w + 2U, datap,
0, ci, mqc, mqc->curctx,
v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
}
2003-11-27 11:10:17 +01:00
static void opj_t1_enc_clnpass(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 *nmsedec,
OPJ_UINT32 cblksty)
{
OPJ_UINT32 i, k;
const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
OPJ_UINT32 agg, runlen;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
*nmsedec = 0;
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf("enc_clnpass: bpno=%d\n", bpno);
#endif
2017-05-23 00:40:30 +02:00
for (k = 0; k < (t1->h & ~3U); k += 4) {
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf(" k=%d\n", k);
#endif
for (i = 0; i < t1->w; ++i) {
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf(" i=%d\n", i);
#endif
agg = !(T1_FLAGS(i, k));
2017-05-22 18:42:46 +02:00
#ifdef DEBUG_ENC_CLN
printf(" agg=%d\n", agg);
#endif
if (agg) {
for (runlen = 0; runlen < 4; ++runlen) {
if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
break;
}
}
opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
opj_mqc_encode(mqc, runlen != 4);
if (runlen == 4) {
continue;
}
opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
opj_mqc_encode(mqc, runlen >> 1);
opj_mqc_encode(mqc, runlen & 1);
} else {
runlen = 0;
}
opj_t1_enc_clnpass_step(
t1,
&T1_FLAGS(i, k),
&t1->data[((k + runlen) * t1->data_stride) + i],
bpno,
one,
nmsedec,
agg,
runlen,
2017-05-23 00:40:30 +02:00
4U,
cblksty);
}
}
if (k < t1->h) {
agg = 0;
runlen = 0;
#ifdef DEBUG_ENC_CLN
printf(" k=%d\n", k);
#endif
for (i = 0; i < t1->w; ++i) {
#ifdef DEBUG_ENC_CLN
printf(" i=%d\n", i);
printf(" agg=%d\n", agg);
#endif
opj_t1_enc_clnpass_step(
t1,
&T1_FLAGS(i, k),
2017-05-23 00:40:30 +02:00
&t1->data[((k + runlen) * t1->data_stride) + i],
bpno,
one,
nmsedec,
agg,
runlen,
t1->h - k,
2017-05-22 18:42:46 +02:00
cblksty);
}
}
2003-11-27 11:10:17 +01:00
}
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
{ \
OPJ_INT32 one, half, oneplushalf; \
OPJ_UINT32 runlen; \
OPJ_UINT32 i, j, k; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
register OPJ_INT32 *data = t1->data; \
register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \
half = one >> 1; \
oneplushalf = one | half; \
for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
opj_flag_t flags = *flagsp; \
if (flags == 0) { \
OPJ_UINT32 partial = OPJ_TRUE; \
opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
if (!v) { \
continue; \
} \
opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
runlen = (runlen << 1) | v; \
switch(runlen) { \
case 0: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
flags, flagsp, flags_stride, data, \
l_w, 0, mqc, curctx, \
v, a, c, ct, oneplushalf, vsc); \
partial = OPJ_FALSE; \
2017-06-17 14:09:31 +02:00
/* FALLTHRU */ \
case 1: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
flags, flagsp, flags_stride, data, \
l_w, 1, mqc, curctx, \
v, a, c, ct, oneplushalf, OPJ_FALSE); \
partial = OPJ_FALSE; \
2017-06-17 14:09:31 +02:00
/* FALLTHRU */ \
case 2: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
flags, flagsp, flags_stride, data, \
l_w, 2, mqc, curctx, \
v, a, c, ct, oneplushalf, OPJ_FALSE); \
partial = OPJ_FALSE; \
2017-06-17 14:09:31 +02:00
/* FALLTHRU */ \
case 3: \
opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
flags, flagsp, flags_stride, data, \
l_w, 3, mqc, curctx, \
v, a, c, ct, oneplushalf, OPJ_FALSE); \
break; \
} \
} else { \
opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
flags, flagsp, flags_stride, data, \
l_w, 0, mqc, curctx, \
v, a, c, ct, oneplushalf, vsc); \
opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
flags, flagsp, flags_stride, data, \
l_w, 1, mqc, curctx, \
v, a, c, ct, oneplushalf, OPJ_FALSE); \
opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
flags, flagsp, flags_stride, data, \
l_w, 2, mqc, curctx, \
v, a, c, ct, oneplushalf, OPJ_FALSE); \
opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
flags, flagsp, flags_stride, data, \
l_w, 3, mqc, curctx, \
v, a, c, ct, oneplushalf, OPJ_FALSE); \
} \
*flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \
} \
UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
if( k < h ) { \
for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
for (j = 0; j < h - k; ++j) { \
opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
} \
*flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \
} \
}
static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
{
if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
opj_mqc_t* mqc = &(t1->mqc);
OPJ_UINT32 v, v2;
opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
opj_mqc_decode(v, mqc);
opj_mqc_decode(v2, mqc);
v = (v << 1) | v2;
opj_mqc_decode(v2, mqc);
v = (v << 1) | v2;
opj_mqc_decode(v2, mqc);
v = (v << 1) | v2;
/*
if (v!=0xa) {
opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
}
*/
}
}
static void opj_t1_dec_clnpass_64x64_novsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
}
static void opj_t1_dec_clnpass_64x64_vsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
}
static void opj_t1_dec_clnpass_generic_novsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
t1->w + 2U);
}
static void opj_t1_dec_clnpass_generic_vsc(
opj_t1_t *t1,
OPJ_INT32 bpno)
{
opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
t1->w + 2U);
}
static void opj_t1_dec_clnpass(
opj_t1_t *t1,
OPJ_INT32 bpno,
OPJ_INT32 cblksty)
{
if (t1->w == 64 && t1->h == 64) {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
} else {
opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
}
} else {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
opj_t1_dec_clnpass_generic_vsc(t1, bpno);
} else {
opj_t1_dec_clnpass_generic_novsc(t1, bpno);
}
}
opj_t1_dec_clnpass_check_segsym(t1, cblksty);
}
2003-11-27 11:10:17 +01:00
/** mod fixed_quality */
static OPJ_FLOAT64 opj_t1_getwmsedec(
OPJ_INT32 nmsedec,
OPJ_UINT32 compno,
OPJ_UINT32 level,
OPJ_UINT32 orient,
OPJ_INT32 bpno,
OPJ_UINT32 qmfbid,
OPJ_FLOAT64 stepsize,
OPJ_UINT32 numcomps,
const OPJ_FLOAT64 * mct_norms,
OPJ_UINT32 mct_numcomps)
{
OPJ_FLOAT64 w1 = 1, w2, wmsedec;
OPJ_ARG_NOT_USED(numcomps);
if (mct_norms && (compno < mct_numcomps)) {
w1 = mct_norms[compno];
}
if (qmfbid == 1) {
w2 = opj_dwt_getnorm(level, orient);
} else { /* if (qmfbid == 0) */
w2 = opj_dwt_getnorm_real(level, orient);
}
wmsedec = w1 * w2 * stepsize * (1 << bpno);
wmsedec *= wmsedec * nmsedec / 8192.0;
return wmsedec;
}
static OPJ_BOOL opj_t1_allocate_buffers(
opj_t1_t *t1,
OPJ_UINT32 w,
OPJ_UINT32 h)
{
OPJ_UINT32 flagssize;
OPJ_UINT32 flags_stride;
/* No risk of overflow. Prior checks ensure those assert are met */
/* They are per the specification */
assert(w <= 1024);
assert(h <= 1024);
assert(w * h <= 4096);
/* encoder uses tile buffer, so no need to allocate */
if (!t1->encoder) {
OPJ_UINT32 datasize = w * h;
if (datasize > (size_t)t1->datasize) {
opj_aligned_free(t1->data);
t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
if (!t1->data) {
/* FIXME event manager error callback */
return OPJ_FALSE;
}
t1->datasize = datasize;
}
/* memset first arg is declared to never be null by gcc */
if (t1->data != NULL) {
memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
}
}
flags_stride = w + 2U; /* can't be 0U */
flagssize = (h + 3U) / 4U + 2U;
flagssize *= flags_stride;
{
opj_flag_t* p;
OPJ_UINT32 x;
OPJ_UINT32 flags_height = (h + 3U) / 4U;
if (flagssize > t1->flagssize) {
opj_aligned_free(t1->flags);
t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
opj_flag_t));
if (!t1->flags) {
/* FIXME event manager error callback */
return OPJ_FALSE;
}
}
t1->flagssize = flagssize;
memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
p = &t1->flags[0];
for (x = 0; x < flags_stride; ++x) {
/* magic value to hopefully stop any passes being interested in this entry */
*p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
}
p = &t1->flags[((flags_height + 1) * flags_stride)];
for (x = 0; x < flags_stride; ++x) {
/* magic value to hopefully stop any passes being interested in this entry */
*p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
}
if (h % 4) {
OPJ_UINT32 v = 0;
p = &t1->flags[((flags_height) * flags_stride)];
if (h % 4 == 1) {
v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
} else if (h % 4 == 2) {
v |= T1_PI_2 | T1_PI_3;
} else if (h % 4 == 3) {
v |= T1_PI_3;
}
for (x = 0; x < flags_stride; ++x) {
*p++ = v;
}
}
}
t1->w = w;
t1->h = h;
return OPJ_TRUE;
}
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/**
* Creates a new Tier 1 handle
* and initializes the look-up tables of the Tier-1 coder/decoder
* @return a new T1 handle if successful, returns NULL otherwise
*/
opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
{
opj_t1_t *l_t1 = 00;
l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
if (!l_t1) {
return 00;
}
l_t1->encoder = isEncoder;
return l_t1;
}
/**
* Destroys a previously created T1 handle
*
* @param p_t1 Tier 1 handle to destroy
*/
void opj_t1_destroy(opj_t1_t *p_t1)
{
if (! p_t1) {
return;
}
/* encoder uses tile buffer, so no need to free */
if (!p_t1->encoder && p_t1->data) {
opj_aligned_free(p_t1->data);
p_t1->data = 00;
}
if (p_t1->flags) {
opj_aligned_free(p_t1->flags);
p_t1->flags = 00;
}
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
opj_free(p_t1->cblkdatabuffer);
Decoding: do not allocate memory for the codestream of each codeblock Currently we allocate at least 8192 bytes for each codeblock, and copy the relevant parts of the codestream in that per-codeblock buffer as we decode packets. As the whole codestream for the tile is ingested in memory and alive during the decoding, we can directly point to it instead of copying. But to do that, we need an intermediate concept, a 'chunk' of code-stream segment, given that segments may be made of data at different places in the code-stream when quality layers are used. With that change, the decoding of MAPA_005.jp2 goes down from the previous improvement of 2.7 GB down to 1.9 GB. New profile: n4: 1885648469 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E78287: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D7B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E7272C: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BDD9: opj_j2k_read_tile_header (j2k.c:8618) n1: 1610689344 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 1610689344 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 1610689344 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BBF0: opj_j2k_read_tile_header (j2k.c:4685) n1: 219232541 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 219232541 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 219232541 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 39822000 0x4E727A9: opj_tcd_init_decode_tile (tcd.c:1219) n1: 39822000 0x4E4BDD9: opj_j2k_read_tile_header (j2k.c:8618) n1: 39822000 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 39822000 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 39822000 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 39822000 0x40374E: main (opj_decompress.c:1459) n0: 15904584 in 52 places, all below massif's threshold (1.00%)
2017-07-06 16:11:11 +02:00
opj_free(p_t1);
}
typedef struct {
2016-05-25 16:38:44 +02:00
OPJ_UINT32 resno;
opj_tcd_cblk_dec_t* cblk;
opj_tcd_band_t* band;
opj_tcd_tilecomp_t* tilec;
opj_tccp_t* tccp;
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
OPJ_BOOL mustuse_cblkdatabuffer;
2016-05-25 16:38:44 +02:00
volatile OPJ_BOOL* pret;
opj_event_mgr_t *p_manager;
opj_mutex_t* p_manager_mutex;
OPJ_BOOL check_pterm;
2016-05-25 16:38:44 +02:00
} opj_t1_cblk_decode_processing_job_t;
static void opj_t1_destroy_wrapper(void* t1)
{
opj_t1_destroy((opj_t1_t*) t1);
2016-05-25 16:38:44 +02:00
}
static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
{
opj_tcd_cblk_dec_t* cblk;
opj_tcd_band_t* band;
opj_tcd_tilecomp_t* tilec;
opj_tccp_t* tccp;
OPJ_INT32* OPJ_RESTRICT datap;
2016-05-25 16:38:44 +02:00
OPJ_UINT32 cblk_w, cblk_h;
OPJ_INT32 x, y;
OPJ_UINT32 i, j;
opj_t1_cblk_decode_processing_job_t* job;
opj_t1_t* t1;
OPJ_UINT32 resno;
OPJ_UINT32 tile_w;
job = (opj_t1_cblk_decode_processing_job_t*) user_data;
resno = job->resno;
cblk = job->cblk;
band = job->band;
tilec = job->tilec;
tccp = job->tccp;
tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
-
tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
2016-05-25 16:38:44 +02:00
if (!*(job->pret)) {
2016-05-25 16:38:44 +02:00
opj_free(job);
return;
}
t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
if (t1 == NULL) {
t1 = opj_t1_create(OPJ_FALSE);
opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2016-05-25 16:38:44 +02:00
}
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
2016-05-25 16:38:44 +02:00
if (OPJ_FALSE == opj_t1_decode_cblk(
t1,
cblk,
band->bandno,
(OPJ_UINT32)tccp->roishift,
tccp->cblksty,
job->p_manager,
job->p_manager_mutex,
job->check_pterm)) {
*(job->pret) = OPJ_FALSE;
opj_free(job);
return;
2016-05-25 16:38:44 +02:00
}
x = cblk->x0 - band->x0;
y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
}
if (band->bandno & 2) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
y += pres->y1 - pres->y0;
}
datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
2016-05-25 16:38:44 +02:00
cblk_w = t1->w;
cblk_h = t1->h;
if (tccp->roishift) {
if (tccp->roishift >= 31) {
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
datap[(j * cblk_w) + i] = 0;
}
}
} else {
OPJ_INT32 thresh = 1 << tccp->roishift;
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
OPJ_INT32 val = datap[(j * cblk_w) + i];
OPJ_INT32 mag = abs(val);
if (mag >= thresh) {
mag >>= tccp->roishift;
datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
}
2016-05-25 16:38:44 +02:00
}
}
}
}
/* Both can be non NULL if for example decoding a full tile and then */
/* partially a tile. In which case partial decoding should be the */
/* priority */
assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
if (cblk->decoded_data) {
if (tccp->qmfbid == 1) {
for (j = 0; j < cblk_h; ++j) {
i = 0;
for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
datap[(j * cblk_w) + i + 0U] = tmp0 / 2;
datap[(j * cblk_w) + i + 1U] = tmp1 / 2;
datap[(j * cblk_w) + i + 2U] = tmp2 / 2;
datap[(j * cblk_w) + i + 3U] = tmp3 / 2;
}
for (; i < cblk_w; ++i) {
datap[(j * cblk_w) + i] /= 2;
}
}
} else { /* if (tccp->qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
memcpy(datap, &tmp, sizeof(tmp));
datap++;
}
}
}
} else if (tccp->qmfbid == 1) {
OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w +
(size_t)x];
2016-05-25 16:38:44 +02:00
for (j = 0; j < cblk_h; ++j) {
i = 0;
for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2;
((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2;
((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2;
((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2;
2016-05-25 16:38:44 +02:00
}
for (; i < cblk_w; ++i) {
OPJ_INT32 tmp = datap[(j * cblk_w) + i];
((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2;
2016-05-25 16:38:44 +02:00
}
}
} else { /* if (tccp->qmfbid == 0) */
OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y *
tile_w + (size_t)x];
2016-05-25 16:38:44 +02:00
for (j = 0; j < cblk_h; ++j) {
OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
2016-05-25 16:38:44 +02:00
for (i = 0; i < cblk_w; ++i) {
OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize;
2016-05-25 16:38:44 +02:00
*tiledp2 = tmp;
datap++;
tiledp2++;
}
tiledp += tile_w;
}
}
opj_free(job);
}
void opj_t1_decode_cblks(opj_tcd_t* tcd,
volatile OPJ_BOOL* pret,
opj_tcd_tilecomp_t* tilec,
opj_tccp_t* tccp,
opj_event_mgr_t *p_manager,
opj_mutex_t* p_manager_mutex,
OPJ_BOOL check_pterm
)
{
opj_thread_pool_t* tp = tcd->thread_pool;
OPJ_UINT32 resno, bandno, precno, cblkno;
for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
opj_tcd_resolution_t* res = &tilec->resolutions[resno];
for (bandno = 0; bandno < res->numbands; ++bandno) {
opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
for (precno = 0; precno < res->pw * res->ph; ++precno) {
opj_tcd_precinct_t* precinct = &band->precincts[precno];
if (!opj_tcd_is_subband_area_of_interest(tcd,
tilec->compno,
resno,
band->bandno,
(OPJ_UINT32)precinct->x0,
(OPJ_UINT32)precinct->y0,
(OPJ_UINT32)precinct->x1,
(OPJ_UINT32)precinct->y1)) {
for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
if (cblk->decoded_data) {
#ifdef DEBUG_VERBOSE
printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
cblk->x0, cblk->y0, resno, bandno);
#endif
opj_free(cblk->decoded_data);
cblk->decoded_data = NULL;
}
}
continue;
}
for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
2016-05-25 16:38:44 +02:00
opj_t1_cblk_decode_processing_job_t* job;
if (!opj_tcd_is_subband_area_of_interest(tcd,
tilec->compno,
resno,
band->bandno,
(OPJ_UINT32)cblk->x0,
(OPJ_UINT32)cblk->y0,
(OPJ_UINT32)cblk->x1,
(OPJ_UINT32)cblk->y1)) {
if (cblk->decoded_data) {
#ifdef DEBUG_VERBOSE
printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
cblk->x0, cblk->y0, resno, bandno);
#endif
opj_free(cblk->decoded_data);
cblk->decoded_data = NULL;
}
continue;
}
if (!tcd->whole_tile_decoding) {
OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
if (cblk->decoded_data != NULL) {
#ifdef DEBUG_VERBOSE
printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
cblk->x0, cblk->y0, resno, bandno);
#endif
continue;
}
if (cblk_w == 0 || cblk_h == 0) {
continue;
}
#ifdef DEBUG_VERBOSE
printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
cblk->x0, cblk->y0, resno, bandno);
#endif
/* Zero-init required */
cblk->decoded_data = opj_calloc(1, cblk_w * cblk_h * sizeof(OPJ_INT32));
if (cblk->decoded_data == NULL) {
if (p_manager_mutex) {
opj_mutex_lock(p_manager_mutex);
}
opj_event_msg(p_manager, EVT_ERROR,
"Cannot allocate cblk->decoded_data\n");
if (p_manager_mutex) {
opj_mutex_unlock(p_manager_mutex);
}
*pret = OPJ_FALSE;
return;
}
} else if (cblk->decoded_data) {
/* Not sure if that code path can happen, but better be */
/* safe than sorry */
opj_free(cblk->decoded_data);
cblk->decoded_data = NULL;
}
job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
sizeof(opj_t1_cblk_decode_processing_job_t));
if (!job) {
*pret = OPJ_FALSE;
return;
}
2016-05-25 16:38:44 +02:00
job->resno = resno;
job->cblk = cblk;
job->band = band;
job->tilec = tilec;
job->tccp = tccp;
job->pret = pret;
job->p_manager_mutex = p_manager_mutex;
job->p_manager = p_manager;
job->check_pterm = check_pterm;
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
if (!(*pret)) {
2016-05-25 16:38:44 +02:00
return;
}
} /* cblkno */
} /* precno */
} /* bandno */
} /* resno */
2016-05-25 16:38:44 +02:00
return;
}
static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
opj_tcd_cblk_dec_t* cblk,
OPJ_UINT32 orient,
OPJ_UINT32 roishift,
OPJ_UINT32 cblksty,
opj_event_mgr_t *p_manager,
opj_mutex_t* p_manager_mutex,
OPJ_BOOL check_pterm)
{
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
OPJ_INT32 bpno_plus_one;
OPJ_UINT32 passtype;
OPJ_UINT32 segno, passno;
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
OPJ_BYTE* cblkdata = NULL;
OPJ_UINT32 cblkdataindex = 0;
OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
OPJ_INT32* original_t1_data = NULL;
mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
if (!opj_t1_allocate_buffers(
t1,
(OPJ_UINT32)(cblk->x1 - cblk->x0),
(OPJ_UINT32)(cblk->y1 - cblk->y0))) {
return OPJ_FALSE;
}
bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
if (bpno_plus_one >= 31) {
if (p_manager_mutex) {
opj_mutex_lock(p_manager_mutex);
}
opj_event_msg(p_manager, EVT_WARNING,
"opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
bpno_plus_one);
if (p_manager_mutex) {
opj_mutex_unlock(p_manager_mutex);
}
return OPJ_FALSE;
}
passtype = 2;
opj_mqc_resetstates(mqc);
opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
/* Even if we have a single chunk, in multi-threaded decoding */
/* the insertion of our synthetic marker might potentially override */
/* valid codestream of other codeblocks decoded in parallel. */
if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
OPJ_UINT32 i;
OPJ_UINT32 cblk_len;
/* Compute whole codeblock length from chunk lengths */
cblk_len = 0;
for (i = 0; i < cblk->numchunks; i++) {
cblk_len += cblk->chunks[i].len;
}
/* Allocate temporary memory if needed */
if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
if (cblkdata == NULL) {
return OPJ_FALSE;
}
t1->cblkdatabuffer = cblkdata;
memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
}
/* Concatenate all chunks */
cblkdata = t1->cblkdatabuffer;
cblk_len = 0;
for (i = 0; i < cblk->numchunks; i++) {
memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
cblk_len += cblk->chunks[i].len;
}
} else if (cblk->numchunks == 1) {
cblkdata = cblk->chunks[0].data;
}
/* For subtile decoding, directly decode in the decoded_data buffer of */
/* the code-block. Hack t1->data to point to it, and restore it later */
if (cblk->decoded_data) {
original_t1_data = t1->data;
t1->data = cblk->decoded_data;
}
for (segno = 0; segno < cblk->real_num_segs; ++segno) {
opj_tcd_seg_t *seg = &cblk->segs[segno];
/* BYPASS mode */
type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
(cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
Decoding: do not allocate memory for the codestream of each codeblock Currently we allocate at least 8192 bytes for each codeblock, and copy the relevant parts of the codestream in that per-codeblock buffer as we decode packets. As the whole codestream for the tile is ingested in memory and alive during the decoding, we can directly point to it instead of copying. But to do that, we need an intermediate concept, a 'chunk' of code-stream segment, given that segments may be made of data at different places in the code-stream when quality layers are used. With that change, the decoding of MAPA_005.jp2 goes down from the previous improvement of 2.7 GB down to 1.9 GB. New profile: n4: 1885648469 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E78287: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D7B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E7272C: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BDD9: opj_j2k_read_tile_header (j2k.c:8618) n1: 1610689344 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 1610689344 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 1610689344 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BBF0: opj_j2k_read_tile_header (j2k.c:4685) n1: 219232541 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 219232541 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 219232541 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 39822000 0x4E727A9: opj_tcd_init_decode_tile (tcd.c:1219) n1: 39822000 0x4E4BDD9: opj_j2k_read_tile_header (j2k.c:8618) n1: 39822000 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 39822000 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 39822000 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 39822000 0x40374E: main (opj_decompress.c:1459) n0: 15904584 in 52 places, all below massif's threshold (1.00%)
2017-07-06 16:11:11 +02:00
if (type == T1_TYPE_RAW) {
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
OPJ_COMMON_CBLK_DATA_EXTRA);
} else {
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
OPJ_COMMON_CBLK_DATA_EXTRA);
}
Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
2017-07-06 19:34:21 +02:00
cblkdataindex += seg->len;
for (passno = 0; (passno < seg->real_num_passes) &&
(bpno_plus_one >= 1); ++passno) {
switch (passtype) {
case 0:
if (type == T1_TYPE_RAW) {
opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
} else {
opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
}
break;
case 1:
if (type == T1_TYPE_RAW) {
opj_t1_dec_refpass_raw(t1, bpno_plus_one);
} else {
opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
}
break;
case 2:
opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
break;
}
if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
opj_mqc_resetstates(mqc);
opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
}
if (++passtype == 3) {
passtype = 0;
bpno_plus_one--;
}
}
opq_mqc_finish_dec(mqc);
}
if (check_pterm) {
if (mqc->bp + 2 < mqc->end) {
if (p_manager_mutex) {
opj_mutex_lock(p_manager_mutex);
}
opj_event_msg(p_manager, EVT_WARNING,
"PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
(int)(mqc->end - mqc->bp) - 2,
(int)(mqc->bp - mqc->start),
(int)(mqc->end - mqc->start));
if (p_manager_mutex) {
opj_mutex_unlock(p_manager_mutex);
}
} else if (mqc->end_of_byte_stream_counter > 2) {
if (p_manager_mutex) {
opj_mutex_lock(p_manager_mutex);
}
opj_event_msg(p_manager, EVT_WARNING,
"PTERM check failure: %d synthetized 0xFF markers read\n",
mqc->end_of_byte_stream_counter);
if (p_manager_mutex) {
opj_mutex_unlock(p_manager_mutex);
}
}
}
/* Restore original t1->data is needed */
if (cblk->decoded_data) {
t1->data = original_t1_data;
}
2015-06-16 04:38:39 +02:00
return OPJ_TRUE;
}
OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
opj_tcd_tile_t *tile,
opj_tcp_t *tcp,
const OPJ_FLOAT64 * mct_norms,
OPJ_UINT32 mct_numcomps
)
{
OPJ_UINT32 compno, resno, bandno, precno, cblkno;
tile->distotile = 0; /* fixed_quality */
for (compno = 0; compno < tile->numcomps; ++compno) {
opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
opj_tccp_t* tccp = &tcp->tccps[compno];
OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
for (resno = 0; resno < tilec->numresolutions; ++resno) {
opj_tcd_resolution_t *res = &tilec->resolutions[resno];
for (bandno = 0; bandno < res->numbands; ++bandno) {
opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
OPJ_INT32 bandconst;
/* Skip empty bands */
if (opj_tcd_is_band_empty(band)) {
continue;
}
bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
for (precno = 0; precno < res->pw * res->ph; ++precno) {
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
OPJ_INT32* OPJ_RESTRICT tiledp;
OPJ_UINT32 cblk_w;
OPJ_UINT32 cblk_h;
OPJ_UINT32 i, j, tileLineAdvance;
size_t tileIndex = 0;
OPJ_INT32 x = cblk->x0 - band->x0;
OPJ_INT32 y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
}
if (band->bandno & 2) {
opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
y += pres->y1 - pres->y0;
}
if (!opj_t1_allocate_buffers(
t1,
(OPJ_UINT32)(cblk->x1 - cblk->x0),
(OPJ_UINT32)(cblk->y1 - cblk->y0))) {
return OPJ_FALSE;
}
cblk_w = t1->w;
cblk_h = t1->h;
tileLineAdvance = tile_w - cblk_w;
tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x];
t1->data = tiledp;
t1->data_stride = tile_w;
if (tccp->qmfbid == 1) {
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS);
tileIndex++;
}
tileIndex += tileLineAdvance;
}
} else { /* if (tccp->qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
OPJ_INT32 tmp = tiledp[tileIndex];
tiledp[tileIndex] =
opj_int_fix_mul_t1(
tmp,
bandconst);
tileIndex++;
}
tileIndex += tileLineAdvance;
}
}
opj_t1_encode_cblk(
t1,
cblk,
band->bandno,
compno,
tilec->numresolutions - 1 - resno,
tccp->qmfbid,
band->stepsize,
tccp->cblksty,
tile->numcomps,
tile,
mct_norms,
mct_numcomps);
} /* cblkno */
} /* precno */
} /* bandno */
} /* resno */
} /* compno */
return OPJ_TRUE;
}
/* Returns whether the pass (bpno, passtype) is terminated */
static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
OPJ_UINT32 cblksty,
OPJ_INT32 bpno,
OPJ_UINT32 passtype)
{
/* Is it the last cleanup pass ? */
if (passtype == 2 && bpno == 0) {
return OPJ_TRUE;
}
if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
return OPJ_TRUE;
}
if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
/* For bypass arithmetic bypass, terminate the 4th cleanup pass */
if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
return OPJ_TRUE;
}
/* and beyond terminate all the magnitude refinement passes (in raw) */
/* and cleanup passes (in MQC) */
if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
return OPJ_TRUE;
}
}
return OPJ_FALSE;
}
/** mod fixed_quality */
static void opj_t1_encode_cblk(opj_t1_t *t1,
opj_tcd_cblk_enc_t* cblk,
OPJ_UINT32 orient,
OPJ_UINT32 compno,
OPJ_UINT32 level,
OPJ_UINT32 qmfbid,
OPJ_FLOAT64 stepsize,
OPJ_UINT32 cblksty,
OPJ_UINT32 numcomps,
opj_tcd_tile_t * tile,
const OPJ_FLOAT64 * mct_norms,
OPJ_UINT32 mct_numcomps)
{
OPJ_FLOAT64 cumwmsedec = 0.0;
opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
OPJ_UINT32 passno;
OPJ_INT32 bpno;
OPJ_UINT32 passtype;
OPJ_INT32 nmsedec = 0;
OPJ_INT32 max;
OPJ_UINT32 i, j;
OPJ_BYTE type = T1_TYPE_MQ;
OPJ_FLOAT64 tempwmsedec;
#ifdef EXTRA_DEBUG
printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
#endif
mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
max = 0;
for (i = 0; i < t1->w; ++i) {
for (j = 0; j < t1->h; ++j) {
OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
max = opj_int_max(max, tmp);
}
}
cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
T1_NMSEDEC_FRACBITS) : 0;
if (cblk->numbps == 0) {
cblk->totalpasses = 0;
return;
}
bpno = (OPJ_INT32)(cblk->numbps - 1);
passtype = 2;
opj_mqc_resetstates(mqc);
opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
opj_mqc_init_enc(mqc, cblk->data);
for (passno = 0; bpno >= 0; ++passno) {
opj_tcd_pass_t *pass = &cblk->passes[passno];
type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
(cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
/* If the previous pass was terminating, we need to reset the encoder */
if (passno > 0 && cblk->passes[passno - 1].term) {
if (type == T1_TYPE_RAW) {
opj_mqc_bypass_init_enc(mqc);
} else {
opj_mqc_restart_init_enc(mqc);
}
}
switch (passtype) {
case 0:
opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
break;
case 1:
opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
break;
case 2:
opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
/* code switch SEGMARK (i.e. SEGSYM) */
if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
opj_mqc_segmark_enc(mqc);
}
break;
}
/* fixed_quality */
tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
stepsize, numcomps, mct_norms, mct_numcomps) ;
cumwmsedec += tempwmsedec;
tile->distotile += tempwmsedec;
pass->distortiondec = cumwmsedec;
if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
/* If it is a terminated pass, terminate it */
if (type == T1_TYPE_RAW) {
opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
} else {
if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
opj_mqc_erterm_enc(mqc);
} else {
opj_mqc_flush(mqc);
}
}
pass->term = 1;
pass->rate = opj_mqc_numbytes(mqc);
} else {
/* Non terminated pass */
OPJ_UINT32 rate_extra_bytes;
if (type == T1_TYPE_RAW) {
rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
} else {
rate_extra_bytes = 3;
}
pass->term = 0;
pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
}
if (++passtype == 3) {
passtype = 0;
bpno--;
}
/* Code-switch "RESET" */
if (cblksty & J2K_CCP_CBLKSTY_RESET) {
opj_mqc_reset_enc(mqc);
}
}
cblk->totalpasses = passno;
if (cblk->totalpasses) {
/* Make sure that pass rates are increasing */
OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
for (passno = cblk->totalpasses; passno > 0;) {
opj_tcd_pass_t *pass = &cblk->passes[--passno];
if (pass->rate > last_pass_rate) {
pass->rate = last_pass_rate;
} else {
last_pass_rate = pass->rate;
}
}
}
for (passno = 0; passno < cblk->totalpasses; passno++) {
opj_tcd_pass_t *pass = &cblk->passes[passno];
/* Prevent generation of FF as last data byte of a pass*/
/* For terminating passes, the flushing procedure ensured this already */
assert(pass->rate > 0);
if (cblk->data[pass->rate - 1] == 0xFF) {
pass->rate--;
}
pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
}
#ifdef EXTRA_DEBUG
printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
/* Check that there not 0xff >=0x90 sequences */
if (cblk->totalpasses) {
OPJ_UINT32 i;
OPJ_UINT32 len = opj_mqc_numbytes(mqc);
for (i = 1; i < len; ++i) {
if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
abort();
}
}
}
#endif
}