Slight improvement in management of code block chunks
Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%)
This commit is contained in:
parent
ca34d13e76
commit
92114694a4
|
@ -1604,7 +1604,7 @@ void opj_t1_destroy(opj_t1_t *p_t1)
|
|||
p_t1->flags = 00;
|
||||
}
|
||||
|
||||
opj_free(p_t1->segdatabuffer);
|
||||
opj_free(p_t1->cblkdatabuffer);
|
||||
|
||||
opj_free(p_t1);
|
||||
}
|
||||
|
@ -1615,7 +1615,7 @@ typedef struct {
|
|||
opj_tcd_band_t* band;
|
||||
opj_tcd_tilecomp_t* tilec;
|
||||
opj_tccp_t* tccp;
|
||||
OPJ_BOOL mustuse_segdatabuffer;
|
||||
OPJ_BOOL mustuse_cblkdatabuffer;
|
||||
volatile OPJ_BOOL* pret;
|
||||
opj_event_mgr_t *p_manager;
|
||||
opj_mutex_t* p_manager_mutex;
|
||||
|
@ -1660,7 +1660,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
|
|||
t1 = opj_t1_create(OPJ_FALSE);
|
||||
opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
|
||||
}
|
||||
t1->mustuse_segdatabuffer = job->mustuse_segdatabuffer;
|
||||
t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
|
||||
|
||||
if (OPJ_FALSE == opj_t1_decode_cblk(
|
||||
t1,
|
||||
|
@ -1790,7 +1790,7 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp,
|
|||
job->p_manager_mutex = p_manager_mutex;
|
||||
job->p_manager = p_manager;
|
||||
job->check_pterm = check_pterm;
|
||||
job->mustuse_segdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
|
||||
job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
|
||||
opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
|
||||
if (!(*pret)) {
|
||||
return;
|
||||
|
@ -1818,6 +1818,8 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
|
|||
OPJ_INT32 bpno_plus_one;
|
||||
OPJ_UINT32 passtype;
|
||||
OPJ_UINT32 segno, passno;
|
||||
OPJ_BYTE* cblkdata = NULL;
|
||||
OPJ_UINT32 cblkdataindex = 0;
|
||||
OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
|
||||
|
||||
mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
|
||||
|
@ -1849,58 +1851,57 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
|
|||
opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
|
||||
opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
|
||||
|
||||
/* Even if we have a single chunk, in multi-threaded decoding */
|
||||
/* the insertion of our synthetic marker might potentially override */
|
||||
/* valid codestream of other codeblocks decoded in parallel. */
|
||||
if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
|
||||
OPJ_UINT32 i;
|
||||
OPJ_UINT32 cblk_len;
|
||||
|
||||
/* Compute whole codeblock length from chunk lengths */
|
||||
cblk_len = 0;
|
||||
for (i = 0; i < cblk->numchunks; i++) {
|
||||
cblk_len += cblk->chunks[i].len;
|
||||
}
|
||||
|
||||
/* Allocate temporary memory if needed */
|
||||
if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
|
||||
cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
|
||||
cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
|
||||
if (cblkdata == NULL) {
|
||||
return OPJ_FALSE;
|
||||
}
|
||||
t1->cblkdatabuffer = cblkdata;
|
||||
memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
|
||||
t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
|
||||
}
|
||||
|
||||
/* Concatenate all chunks */
|
||||
cblkdata = t1->cblkdatabuffer;
|
||||
cblk_len = 0;
|
||||
for (i = 0; i < cblk->numchunks; i++) {
|
||||
memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
|
||||
cblk_len += cblk->chunks[i].len;
|
||||
}
|
||||
} else if (cblk->numchunks == 1) {
|
||||
cblkdata = cblk->chunks[0].data;
|
||||
}
|
||||
|
||||
for (segno = 0; segno < cblk->real_num_segs; ++segno) {
|
||||
opj_tcd_seg_t *seg = &cblk->segs[segno];
|
||||
OPJ_BYTE* segdata;
|
||||
OPJ_UINT32 seglen;
|
||||
|
||||
/* BYPASS mode */
|
||||
type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
|
||||
(cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
|
||||
|
||||
/* Even if we have a single chunk, in mulithtreaded decoding */
|
||||
/* the insertion of our synthetic marker might potentially override */
|
||||
/* valid codestream of other codeblocks decoded in parallel. */
|
||||
if (seg->numchunks == 1 && !(t1->mustuse_segdatabuffer)) {
|
||||
segdata = seg->chunks[0].data;
|
||||
seglen = seg->chunks[0].len;
|
||||
} else {
|
||||
OPJ_UINT32 i;
|
||||
|
||||
/* Compute whole segment length from chunk lengths */
|
||||
seglen = 0;
|
||||
for (i = 0; i < seg->numchunks; i++) {
|
||||
seglen += seg->chunks[i].len;
|
||||
}
|
||||
|
||||
/* Allocate temporary memory if needed */
|
||||
if (seglen + OPJ_COMMON_CBLK_DATA_EXTRA > t1->segdatabuffersize) {
|
||||
segdata = (OPJ_BYTE*)opj_realloc(t1->segdatabuffer,
|
||||
seglen + OPJ_COMMON_CBLK_DATA_EXTRA);
|
||||
if (segdata == NULL) {
|
||||
return OPJ_FALSE;
|
||||
}
|
||||
t1->segdatabuffer = segdata;
|
||||
memset(t1->segdatabuffer + seglen, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
|
||||
t1->segdatabuffersize = seglen + OPJ_COMMON_CBLK_DATA_EXTRA;
|
||||
}
|
||||
|
||||
/* Concatenate all segments chunks */
|
||||
segdata = t1->segdatabuffer;
|
||||
seglen = 0;
|
||||
for (i = 0; i < seg->numchunks; i++) {
|
||||
memcpy(segdata + seglen, seg->chunks[i].data, seg->chunks[i].len);
|
||||
seglen += seg->chunks[i].len;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == T1_TYPE_RAW) {
|
||||
opj_mqc_raw_init_dec(mqc, segdata, seglen,
|
||||
opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
|
||||
OPJ_COMMON_CBLK_DATA_EXTRA);
|
||||
} else {
|
||||
opj_mqc_init_dec(mqc, segdata, seglen,
|
||||
opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
|
||||
OPJ_COMMON_CBLK_DATA_EXTRA);
|
||||
}
|
||||
cblkdataindex += seg->len;
|
||||
|
||||
for (passno = 0; (passno < seg->real_num_passes) &&
|
||||
(bpno_plus_one >= 1); ++passno) {
|
||||
|
|
|
@ -202,10 +202,12 @@ typedef struct opj_t1 {
|
|||
OPJ_BOOL encoder;
|
||||
|
||||
/* Thre 3 variables below are only used by the decoder */
|
||||
OPJ_BOOL mustuse_segdatabuffer; /* set to TRUE in multithreaded context */
|
||||
OPJ_BYTE
|
||||
*segdatabuffer; /* Temporary buffer to concatenate all chunks of a segment */
|
||||
OPJ_UINT32 segdatabuffersize; /* Maximum size available in segdatabuffer */
|
||||
/* set to TRUE in multithreaded context */
|
||||
OPJ_BOOL mustuse_cblkdatabuffer;
|
||||
/* Temporary buffer to concatenate all chunks of a codebock */
|
||||
OPJ_BYTE *cblkdatabuffer;
|
||||
/* Maximum size available in cblkdatabuffer */
|
||||
OPJ_UINT32 cblkdatabuffersize;
|
||||
} opj_t1_t;
|
||||
|
||||
/** @name Exported functions */
|
||||
|
|
|
@ -1287,25 +1287,26 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2,
|
|||
|
||||
#endif /* USE_JPWL */
|
||||
|
||||
if (l_seg->numchunks == l_seg->numchunksalloc) {
|
||||
OPJ_UINT32 l_numchunksalloc = l_seg->numchunksalloc * 2 + 1;
|
||||
if (l_cblk->numchunks == l_cblk->numchunksalloc) {
|
||||
OPJ_UINT32 l_numchunksalloc = l_cblk->numchunksalloc * 2 + 1;
|
||||
opj_tcd_seg_data_chunk_t* l_chunks =
|
||||
(opj_tcd_seg_data_chunk_t*)opj_realloc(l_seg->chunks,
|
||||
(opj_tcd_seg_data_chunk_t*)opj_realloc(l_cblk->chunks,
|
||||
l_numchunksalloc * sizeof(opj_tcd_seg_data_chunk_t));
|
||||
if (l_chunks == NULL) {
|
||||
opj_event_msg(p_manager, EVT_ERROR,
|
||||
"cannot allocate opj_tcd_seg_data_chunk_t* array");
|
||||
return OPJ_FALSE;
|
||||
}
|
||||
l_seg->chunks = l_chunks;
|
||||
l_seg->numchunksalloc = l_numchunksalloc;
|
||||
l_cblk->chunks = l_chunks;
|
||||
l_cblk->numchunksalloc = l_numchunksalloc;
|
||||
}
|
||||
|
||||
l_seg->chunks[l_seg->numchunks].data = l_current_data;
|
||||
l_seg->chunks[l_seg->numchunks].len = l_seg->newlen;
|
||||
l_seg->numchunks ++;
|
||||
l_cblk->chunks[l_cblk->numchunks].data = l_current_data;
|
||||
l_cblk->chunks[l_cblk->numchunks].len = l_seg->newlen;
|
||||
l_cblk->numchunks ++;
|
||||
|
||||
l_current_data += l_seg->newlen;
|
||||
l_seg->len += l_seg->newlen;
|
||||
l_seg->numpasses += l_seg->numnewpasses;
|
||||
l_cblk->numnewpasses -= l_seg->numnewpasses;
|
||||
|
||||
|
|
|
@ -1213,11 +1213,7 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
|
|||
|
||||
void opj_tcd_reinit_segment(opj_tcd_seg_t* seg)
|
||||
{
|
||||
opj_tcd_seg_data_chunk_t *l_chunks = seg->chunks;
|
||||
OPJ_UINT32 l_numchunksalloc = seg->numchunksalloc;
|
||||
memset(seg, 0, sizeof(opj_tcd_seg_t));
|
||||
seg->chunks = l_chunks;
|
||||
seg->numchunksalloc = l_numchunksalloc;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1241,6 +1237,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
|
|||
/* sanitize */
|
||||
opj_tcd_seg_t * l_segs = p_code_block->segs;
|
||||
OPJ_UINT32 l_current_max_segs = p_code_block->m_current_max_segs;
|
||||
opj_tcd_seg_data_chunk_t* l_chunks = p_code_block->chunks;
|
||||
OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc;
|
||||
OPJ_UINT32 i;
|
||||
|
||||
memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t));
|
||||
|
@ -1249,6 +1247,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
|
|||
for (i = 0; i < l_current_max_segs; ++i) {
|
||||
opj_tcd_reinit_segment(&l_segs[i]);
|
||||
}
|
||||
p_code_block->chunks = l_chunks;
|
||||
p_code_block->numchunksalloc = l_numchunksalloc;
|
||||
}
|
||||
|
||||
return OPJ_TRUE;
|
||||
|
@ -1952,14 +1952,15 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct)
|
|||
for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) {
|
||||
|
||||
if (l_code_block->segs) {
|
||||
OPJ_UINT32 i;
|
||||
for (i = 0; i < l_code_block->m_current_max_segs; ++ i) {
|
||||
opj_free(l_code_block->segs[i].chunks);
|
||||
}
|
||||
opj_free(l_code_block->segs);
|
||||
l_code_block->segs = 00;
|
||||
}
|
||||
|
||||
if (l_code_block->chunks) {
|
||||
opj_free(l_code_block->chunks);
|
||||
l_code_block->chunks = 00;
|
||||
}
|
||||
|
||||
++l_code_block;
|
||||
}
|
||||
|
||||
|
|
|
@ -89,12 +89,12 @@ typedef struct opj_tcd_cblk_enc {
|
|||
} opj_tcd_cblk_enc_t;
|
||||
|
||||
|
||||
/** Chunk of codestream data that is part of a T1 segment */
|
||||
/** Chunk of codestream data that is part of a code block */
|
||||
typedef struct opj_tcd_seg_data_chunk {
|
||||
OPJ_BYTE *
|
||||
data; /* Point to tilepart buffer. We don't make a copy !
|
||||
/* Point to tilepart buffer. We don't make a copy !
|
||||
So the tilepart buffer must be kept alive
|
||||
as long as we need to decode the codeblocks */
|
||||
OPJ_BYTE * data;
|
||||
OPJ_UINT32 len; /* Usable length of data */
|
||||
} opj_tcd_seg_data_chunk_t;
|
||||
|
||||
|
@ -102,35 +102,37 @@ typedef struct opj_tcd_seg_data_chunk {
|
|||
* A segment represent a number of consecutive coding passes, without termination
|
||||
* of MQC or RAW between them. */
|
||||
typedef struct opj_tcd_seg {
|
||||
opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
|
||||
OPJ_UINT32 numchunks; /* Number of valid chunks items */
|
||||
OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */
|
||||
OPJ_UINT32
|
||||
numpasses; /* Number of passes decoded. Including those that we skip */
|
||||
OPJ_UINT32
|
||||
real_num_passes; /* Number of passes actually to be decoded. To be used for code-block decoding */
|
||||
OPJ_UINT32 maxpasses; /* Maximum number of passes for this segment */
|
||||
OPJ_UINT32
|
||||
numnewpasses; /* Number of new passes for current packed. Transitory value */
|
||||
OPJ_UINT32
|
||||
newlen; /* Codestream length for this segment for current packed. Transitory value */
|
||||
OPJ_UINT32 len; /* Size of data related to this segment */
|
||||
/* Number of passes decoded. Including those that we skip */
|
||||
OPJ_UINT32 numpasses;
|
||||
/* Number of passes actually to be decoded. To be used for code-block decoding */
|
||||
OPJ_UINT32 real_num_passes;
|
||||
/* Maximum number of passes for this segment */
|
||||
OPJ_UINT32 maxpasses;
|
||||
/* Number of new passes for current packed. Transitory value */
|
||||
OPJ_UINT32 numnewpasses;
|
||||
/* Codestream length for this segment for current packed. Transitory value */
|
||||
OPJ_UINT32 newlen;
|
||||
} opj_tcd_seg_t;
|
||||
|
||||
/* Code-block for decoding */
|
||||
typedef struct opj_tcd_cblk_dec {
|
||||
opj_tcd_seg_t* segs; /* segments information */
|
||||
OPJ_INT32 x0, y0, x1,
|
||||
y1; /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
|
||||
opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
|
||||
/* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
|
||||
OPJ_INT32 x0, y0, x1, y1;
|
||||
OPJ_UINT32 numbps;
|
||||
OPJ_UINT32
|
||||
numlenbits; /* number of bits for len,, for the current packet. Transitory value */
|
||||
OPJ_UINT32
|
||||
numnewpasses; /* number of pass added to the code-blocks, for the current packet. Transitory value */
|
||||
OPJ_UINT32
|
||||
numsegs; /* number of segments, including those of packet we skip */
|
||||
OPJ_UINT32
|
||||
real_num_segs; /* number of segments, to be used for code block decoding */
|
||||
/* number of bits for len, for the current packet. Transitory value */
|
||||
OPJ_UINT32 numlenbits;
|
||||
/* number of pass added to the code-blocks, for the current packet. Transitory value */
|
||||
OPJ_UINT32 numnewpasses;
|
||||
/* number of segments, including those of packet we skip */
|
||||
OPJ_UINT32 numsegs;
|
||||
/* number of segments, to be used for code block decoding */
|
||||
OPJ_UINT32 real_num_segs;
|
||||
OPJ_UINT32 m_current_max_segs; /* allocated number of segs[] items */
|
||||
OPJ_UINT32 numchunks; /* Number of valid chunks items */
|
||||
OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */
|
||||
} opj_tcd_cblk_dec_t;
|
||||
|
||||
/**
|
||||
|
@ -397,7 +399,7 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec);
|
|||
*/
|
||||
OPJ_BOOL opj_tcd_is_band_empty(opj_tcd_band_t* band);
|
||||
|
||||
/** Reinitialize a segment, without deallocating its chunks array */
|
||||
/** Reinitialize a segment */
|
||||
void opj_tcd_reinit_segment(opj_tcd_seg_t* seg);
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
|
Loading…
Reference in New Issue