Slight improvement in management of code block chunks

Instead of having the chunk array at the segment level, we can move it down to
the codeblock itself since segments are filled in sequential order.
Limit the number of memory allocation, and decrease slightly the memory usage.

On MAPA_005.jp2

n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
 n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61)
  n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676)
   n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816)
    n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617)
     n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348)
      n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846)
       n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564)
        n0: 1610689344 0x40374E: main (opj_decompress.c:1459)
 n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683)
  n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348)
   n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846)
    n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564)
     n0: 219232541 0x40374E: main (opj_decompress.c:1459)
 n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225)
  n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617)
   n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348)
    n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846)
     n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564)
      n0: 23893200 0x40374E: main (opj_decompress.c:1459)
 n0: 17497464 in 52 places, all below massif's threshold (1.00%)
This commit is contained in:
Even Rouault 2017-07-06 19:34:21 +02:00
parent ca34d13e76
commit 92114694a4
5 changed files with 99 additions and 92 deletions

View File

@ -1604,7 +1604,7 @@ void opj_t1_destroy(opj_t1_t *p_t1)
p_t1->flags = 00;
}
opj_free(p_t1->segdatabuffer);
opj_free(p_t1->cblkdatabuffer);
opj_free(p_t1);
}
@ -1615,7 +1615,7 @@ typedef struct {
opj_tcd_band_t* band;
opj_tcd_tilecomp_t* tilec;
opj_tccp_t* tccp;
OPJ_BOOL mustuse_segdatabuffer;
OPJ_BOOL mustuse_cblkdatabuffer;
volatile OPJ_BOOL* pret;
opj_event_mgr_t *p_manager;
opj_mutex_t* p_manager_mutex;
@ -1660,7 +1660,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
t1 = opj_t1_create(OPJ_FALSE);
opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
}
t1->mustuse_segdatabuffer = job->mustuse_segdatabuffer;
t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
if (OPJ_FALSE == opj_t1_decode_cblk(
t1,
@ -1790,7 +1790,7 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp,
job->p_manager_mutex = p_manager_mutex;
job->p_manager = p_manager;
job->check_pterm = check_pterm;
job->mustuse_segdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
if (!(*pret)) {
return;
@ -1818,6 +1818,8 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
OPJ_INT32 bpno_plus_one;
OPJ_UINT32 passtype;
OPJ_UINT32 segno, passno;
OPJ_BYTE* cblkdata = NULL;
OPJ_UINT32 cblkdataindex = 0;
OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
@ -1849,58 +1851,57 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
/* Even if we have a single chunk, in multi-threaded decoding */
/* the insertion of our synthetic marker might potentially override */
/* valid codestream of other codeblocks decoded in parallel. */
if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
OPJ_UINT32 i;
OPJ_UINT32 cblk_len;
/* Compute whole codeblock length from chunk lengths */
cblk_len = 0;
for (i = 0; i < cblk->numchunks; i++) {
cblk_len += cblk->chunks[i].len;
}
/* Allocate temporary memory if needed */
if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
if (cblkdata == NULL) {
return OPJ_FALSE;
}
t1->cblkdatabuffer = cblkdata;
memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
}
/* Concatenate all chunks */
cblkdata = t1->cblkdatabuffer;
cblk_len = 0;
for (i = 0; i < cblk->numchunks; i++) {
memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
cblk_len += cblk->chunks[i].len;
}
} else if (cblk->numchunks == 1) {
cblkdata = cblk->chunks[0].data;
}
for (segno = 0; segno < cblk->real_num_segs; ++segno) {
opj_tcd_seg_t *seg = &cblk->segs[segno];
OPJ_BYTE* segdata;
OPJ_UINT32 seglen;
/* BYPASS mode */
type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
(cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
/* Even if we have a single chunk, in mulithtreaded decoding */
/* the insertion of our synthetic marker might potentially override */
/* valid codestream of other codeblocks decoded in parallel. */
if (seg->numchunks == 1 && !(t1->mustuse_segdatabuffer)) {
segdata = seg->chunks[0].data;
seglen = seg->chunks[0].len;
} else {
OPJ_UINT32 i;
/* Compute whole segment length from chunk lengths */
seglen = 0;
for (i = 0; i < seg->numchunks; i++) {
seglen += seg->chunks[i].len;
}
/* Allocate temporary memory if needed */
if (seglen + OPJ_COMMON_CBLK_DATA_EXTRA > t1->segdatabuffersize) {
segdata = (OPJ_BYTE*)opj_realloc(t1->segdatabuffer,
seglen + OPJ_COMMON_CBLK_DATA_EXTRA);
if (segdata == NULL) {
return OPJ_FALSE;
}
t1->segdatabuffer = segdata;
memset(t1->segdatabuffer + seglen, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
t1->segdatabuffersize = seglen + OPJ_COMMON_CBLK_DATA_EXTRA;
}
/* Concatenate all segments chunks */
segdata = t1->segdatabuffer;
seglen = 0;
for (i = 0; i < seg->numchunks; i++) {
memcpy(segdata + seglen, seg->chunks[i].data, seg->chunks[i].len);
seglen += seg->chunks[i].len;
}
}
if (type == T1_TYPE_RAW) {
opj_mqc_raw_init_dec(mqc, segdata, seglen,
opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
OPJ_COMMON_CBLK_DATA_EXTRA);
} else {
opj_mqc_init_dec(mqc, segdata, seglen,
opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
OPJ_COMMON_CBLK_DATA_EXTRA);
}
cblkdataindex += seg->len;
for (passno = 0; (passno < seg->real_num_passes) &&
(bpno_plus_one >= 1); ++passno) {

View File

@ -202,10 +202,12 @@ typedef struct opj_t1 {
OPJ_BOOL encoder;
/* Thre 3 variables below are only used by the decoder */
OPJ_BOOL mustuse_segdatabuffer; /* set to TRUE in multithreaded context */
OPJ_BYTE
*segdatabuffer; /* Temporary buffer to concatenate all chunks of a segment */
OPJ_UINT32 segdatabuffersize; /* Maximum size available in segdatabuffer */
/* set to TRUE in multithreaded context */
OPJ_BOOL mustuse_cblkdatabuffer;
/* Temporary buffer to concatenate all chunks of a codebock */
OPJ_BYTE *cblkdatabuffer;
/* Maximum size available in cblkdatabuffer */
OPJ_UINT32 cblkdatabuffersize;
} opj_t1_t;
/** @name Exported functions */

View File

@ -1287,25 +1287,26 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2,
#endif /* USE_JPWL */
if (l_seg->numchunks == l_seg->numchunksalloc) {
OPJ_UINT32 l_numchunksalloc = l_seg->numchunksalloc * 2 + 1;
if (l_cblk->numchunks == l_cblk->numchunksalloc) {
OPJ_UINT32 l_numchunksalloc = l_cblk->numchunksalloc * 2 + 1;
opj_tcd_seg_data_chunk_t* l_chunks =
(opj_tcd_seg_data_chunk_t*)opj_realloc(l_seg->chunks,
(opj_tcd_seg_data_chunk_t*)opj_realloc(l_cblk->chunks,
l_numchunksalloc * sizeof(opj_tcd_seg_data_chunk_t));
if (l_chunks == NULL) {
opj_event_msg(p_manager, EVT_ERROR,
"cannot allocate opj_tcd_seg_data_chunk_t* array");
return OPJ_FALSE;
}
l_seg->chunks = l_chunks;
l_seg->numchunksalloc = l_numchunksalloc;
l_cblk->chunks = l_chunks;
l_cblk->numchunksalloc = l_numchunksalloc;
}
l_seg->chunks[l_seg->numchunks].data = l_current_data;
l_seg->chunks[l_seg->numchunks].len = l_seg->newlen;
l_seg->numchunks ++;
l_cblk->chunks[l_cblk->numchunks].data = l_current_data;
l_cblk->chunks[l_cblk->numchunks].len = l_seg->newlen;
l_cblk->numchunks ++;
l_current_data += l_seg->newlen;
l_seg->len += l_seg->newlen;
l_seg->numpasses += l_seg->numnewpasses;
l_cblk->numnewpasses -= l_seg->numnewpasses;

View File

@ -1213,11 +1213,7 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
void opj_tcd_reinit_segment(opj_tcd_seg_t* seg)
{
opj_tcd_seg_data_chunk_t *l_chunks = seg->chunks;
OPJ_UINT32 l_numchunksalloc = seg->numchunksalloc;
memset(seg, 0, sizeof(opj_tcd_seg_t));
seg->chunks = l_chunks;
seg->numchunksalloc = l_numchunksalloc;
}
/**
@ -1241,6 +1237,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
/* sanitize */
opj_tcd_seg_t * l_segs = p_code_block->segs;
OPJ_UINT32 l_current_max_segs = p_code_block->m_current_max_segs;
opj_tcd_seg_data_chunk_t* l_chunks = p_code_block->chunks;
OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc;
OPJ_UINT32 i;
memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t));
@ -1249,6 +1247,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
for (i = 0; i < l_current_max_segs; ++i) {
opj_tcd_reinit_segment(&l_segs[i]);
}
p_code_block->chunks = l_chunks;
p_code_block->numchunksalloc = l_numchunksalloc;
}
return OPJ_TRUE;
@ -1952,14 +1952,15 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct)
for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) {
if (l_code_block->segs) {
OPJ_UINT32 i;
for (i = 0; i < l_code_block->m_current_max_segs; ++ i) {
opj_free(l_code_block->segs[i].chunks);
}
opj_free(l_code_block->segs);
l_code_block->segs = 00;
}
if (l_code_block->chunks) {
opj_free(l_code_block->chunks);
l_code_block->chunks = 00;
}
++l_code_block;
}

View File

@ -89,12 +89,12 @@ typedef struct opj_tcd_cblk_enc {
} opj_tcd_cblk_enc_t;
/** Chunk of codestream data that is part of a T1 segment */
/** Chunk of codestream data that is part of a code block */
typedef struct opj_tcd_seg_data_chunk {
OPJ_BYTE *
data; /* Point to tilepart buffer. We don't make a copy !
So the tilepart buffer must be kept alive
as long as we need to decode the codeblocks */
/* Point to tilepart buffer. We don't make a copy !
So the tilepart buffer must be kept alive
as long as we need to decode the codeblocks */
OPJ_BYTE * data;
OPJ_UINT32 len; /* Usable length of data */
} opj_tcd_seg_data_chunk_t;
@ -102,35 +102,37 @@ typedef struct opj_tcd_seg_data_chunk {
* A segment represent a number of consecutive coding passes, without termination
* of MQC or RAW between them. */
typedef struct opj_tcd_seg {
opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
OPJ_UINT32 numchunks; /* Number of valid chunks items */
OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */
OPJ_UINT32
numpasses; /* Number of passes decoded. Including those that we skip */
OPJ_UINT32
real_num_passes; /* Number of passes actually to be decoded. To be used for code-block decoding */
OPJ_UINT32 maxpasses; /* Maximum number of passes for this segment */
OPJ_UINT32
numnewpasses; /* Number of new passes for current packed. Transitory value */
OPJ_UINT32
newlen; /* Codestream length for this segment for current packed. Transitory value */
OPJ_UINT32 len; /* Size of data related to this segment */
/* Number of passes decoded. Including those that we skip */
OPJ_UINT32 numpasses;
/* Number of passes actually to be decoded. To be used for code-block decoding */
OPJ_UINT32 real_num_passes;
/* Maximum number of passes for this segment */
OPJ_UINT32 maxpasses;
/* Number of new passes for current packed. Transitory value */
OPJ_UINT32 numnewpasses;
/* Codestream length for this segment for current packed. Transitory value */
OPJ_UINT32 newlen;
} opj_tcd_seg_t;
/* Code-block for decoding */
typedef struct opj_tcd_cblk_dec {
opj_tcd_seg_t* segs; /* segments information */
OPJ_INT32 x0, y0, x1,
y1; /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
/* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
OPJ_INT32 x0, y0, x1, y1;
OPJ_UINT32 numbps;
OPJ_UINT32
numlenbits; /* number of bits for len,, for the current packet. Transitory value */
OPJ_UINT32
numnewpasses; /* number of pass added to the code-blocks, for the current packet. Transitory value */
OPJ_UINT32
numsegs; /* number of segments, including those of packet we skip */
OPJ_UINT32
real_num_segs; /* number of segments, to be used for code block decoding */
/* number of bits for len, for the current packet. Transitory value */
OPJ_UINT32 numlenbits;
/* number of pass added to the code-blocks, for the current packet. Transitory value */
OPJ_UINT32 numnewpasses;
/* number of segments, including those of packet we skip */
OPJ_UINT32 numsegs;
/* number of segments, to be used for code block decoding */
OPJ_UINT32 real_num_segs;
OPJ_UINT32 m_current_max_segs; /* allocated number of segs[] items */
OPJ_UINT32 numchunks; /* Number of valid chunks items */
OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */
} opj_tcd_cblk_dec_t;
/**
@ -397,7 +399,7 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec);
*/
OPJ_BOOL opj_tcd_is_band_empty(opj_tcd_band_t* band);
/** Reinitialize a segment, without deallocating its chunks array */
/** Reinitialize a segment */
void opj_tcd_reinit_segment(opj_tcd_seg_t* seg);
/* ----------------------------------------------------------------------- */