From 793edc38e41700e9b4cda4b7f9c79aba95b8d989 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 5 Jul 2017 23:48:28 +0200 Subject: [PATCH 1/8] Decrease memory consumption for whole image single tile decoding. We can use the same buffer for the tile decoding and the final image, and save the intermediate buffer to transfer between those. Effect on the decoding of MAPA (9944 x 13498 x 3 components of size byte) Peak memory from 4.5 GB to 2.7 GB Now: n5: 2699708767 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E77E07: opj_aligned_malloc (opj_malloc.c:61) <-- final image n1: 1610689344 0x4E7195B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E722D2: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 1610689344 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 1610689344 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 1610689344 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40369E: main (opj_decompress.c:1459) n1: 815554560 0x4E72231: opj_tcd_init_decode_tile (tcd.c:1217) <-- working memory for code blocks: 9944*13498/64/64*8192*3 n1: 815554560 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 815554560 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 815554560 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 815554560 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 815554560 0x40369E: main (opj_decompress.c:1459) n1: 219758391 0x4E4C0BF: opj_j2k_read_tile_header (j2k.c:4661) <-- ingestion of code stream n1: 219758391 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 219758391 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 219758391 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 219758391 0x40369E: main (opj_decompress.c:1459) n1: 39822000 0x4E7224F: opj_tcd_init_decode_tile (tcd.c:1224) <-- OPJ_J2K_DEFAULT_NB_SEGS*sizeof(opj_tcd_seg_t) per codeblock n1: 39822000 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 39822000 0x4E4C742: opj_j2k_decode_tiles (j2k.c:10324) n1: 39822000 0x4E4E20E: opj_j2k_decode (j2k.c:7826) n1: 39822000 0x4E52E42: opj_jp2_decode (jp2.c:1564) n0: 39822000 0x40369E: main (opj_decompress.c:1459) n0: 13884472 in 49 places, all below massif's threshold (1.00%) Before: n5: 4493329848 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n2: 1610709160 0x4E77C87: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E717DB: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E72152: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 1610689344 0x4E4C64A: opj_j2k_decode_tiles (j2k.c:10318) n1: 1610689344 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 1610689344 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40369E: main (opj_decompress.c:1459) n0: 19816 in 2 places, all below massif's threshold (1.00%) n1: 1610689344 0x4E43F36: opj_j2k_update_image_data.isra.7 (j2k.c:8743) n1: 1610689344 0x4E4C5C1: opj_j2k_decode_tiles (j2k.c:10358) n1: 1610689344 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 1610689344 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40369E: main (opj_decompress.c:1459) n1: 815554560 0x4E720B1: opj_tcd_init_decode_tile (tcd.c:1217) n1: 815554560 0x4E4BCF1: opj_j2k_read_tile_header (j2k.c:8597) n1: 815554560 0x4E4C64A: opj_j2k_decode_tiles (j2k.c:10318) n1: 815554560 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 815554560 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 815554560 0x40369E: main (opj_decompress.c:1459) n1: 402672336 0x4E4C545: opj_j2k_decode_tiles (j2k.c:10336) n1: 402672336 0x4E4E08E: opj_j2k_decode (j2k.c:7826) n1: 402672336 0x4E52CC2: opj_jp2_decode (jp2.c:1564) n0: 402672336 0x40369E: main (opj_decompress.c:1459) n0: 53704448 in 58 places, all below massif's threshold (1.00%) --- src/lib/openjp2/j2k.c | 62 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 76efb018..b665924d 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -8716,15 +8716,20 @@ OPJ_BOOL opj_j2k_decode_tile(opj_j2k_t * p_j2k, return OPJ_FALSE; } - if (! opj_tcd_update_tile_data(p_j2k->m_tcd, p_data, p_data_size)) { - return OPJ_FALSE; - } + /* p_data can be set to NULL when the call will take care of using */ + /* itself the TCD data. This is typically the case for whole single */ + /* tile decoding optimization. */ + if (p_data != NULL) { + if (! opj_tcd_update_tile_data(p_j2k->m_tcd, p_data, p_data_size)) { + return OPJ_FALSE; + } - /* To avoid to destroy the tcp which can be useful when we try to decode a tile decoded before (cf j2k_random_tile_access) - * we destroy just the data which will be re-read in read_tile_header*/ - /*opj_j2k_tcp_destroy(l_tcp); - p_j2k->m_tcd->tcp = 0;*/ - opj_j2k_tcp_data_destroy(l_tcp); + /* To avoid to destroy the tcp which can be useful when we try to decode a tile decoded before (cf j2k_random_tile_access) + * we destroy just the data which will be re-read in read_tile_header*/ + /*opj_j2k_tcp_destroy(l_tcp); + p_j2k->m_tcd->tcp = 0;*/ + opj_j2k_tcp_data_destroy(l_tcp); + } p_j2k->m_specific_param.m_decoder.m_can_decode = 0; p_j2k->m_specific_param.m_decoder.m_state &= (~(OPJ_UINT32)J2K_STATE_DATA); @@ -10381,6 +10386,47 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, OPJ_BYTE * l_current_data; OPJ_UINT32 nr_tiles = 0; + /* Particular case for whole single tile decoding */ + /* We can avoid allocating intermediate tile buffers */ + if (p_j2k->m_cp.tw == 1 && p_j2k->m_cp.th == 1 && + p_j2k->m_cp.tx0 == 0 && p_j2k->m_cp.ty0 == 0 && + p_j2k->m_output_image->x0 == 0 && + p_j2k->m_output_image->y0 == 0 && + p_j2k->m_output_image->x1 == p_j2k->m_cp.tdx && + p_j2k->m_output_image->y1 == p_j2k->m_cp.tdy && + p_j2k->m_output_image->comps[0].factor == 0) { + OPJ_UINT32 i; + if (! opj_j2k_read_tile_header(p_j2k, + &l_current_tile_no, + &l_data_size, + &l_tile_x0, &l_tile_y0, + &l_tile_x1, &l_tile_y1, + &l_nb_comps, + &l_go_on, + p_stream, + p_manager)) { + return OPJ_FALSE; + } + + if (! opj_j2k_decode_tile(p_j2k, l_current_tile_no, NULL, 0, + p_stream, p_manager)) { + opj_event_msg(p_manager, EVT_ERROR, "Failed to decode tile 1/1\n"); + return OPJ_FALSE; + } + + /* Transfer TCD data to output image data */ + for (i = 0; i < p_j2k->m_output_image->numcomps; i++) { + opj_free(p_j2k->m_output_image->comps[i].data); + p_j2k->m_output_image->comps[i].data = + p_j2k->m_tcd->tcd_image->tiles->comps[i].data; + p_j2k->m_output_image->comps[i].resno_decoded = + p_j2k->m_tcd->image->comps[i].resno_decoded; + p_j2k->m_tcd->tcd_image->tiles->comps[i].data = NULL; + } + + return OPJ_TRUE; + } + l_current_data = (OPJ_BYTE*)opj_malloc(1000); if (! l_current_data) { opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to decode tiles\n"); From 61fb5dd7f81c2e3dfabbb99f59dc89572d59fa37 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 01:05:24 +0200 Subject: [PATCH 2/8] Fix crash on Windows due to b7594c0fcb9dd3aa6356d72c4a525d76168da689 b7594c0fcb9dd3aa6356d72c4a525d76168da689 may put opj_tcd_tilecomp_t->data allocated by opj_alloc_tile_component_data() as the image->comps[].data. As opj_alloc_tile_component_data() use opj_aligned_malloc() we must be sure to ue opj_alined_malloc()/_free() in all places where we alloc/free image->comps[].data. Note: this might have some compatibility impact in case user code does itself the allocation/free of image->comps[].data --- src/lib/openjp2/image.c | 10 ++++++---- src/lib/openjp2/j2k.c | 11 +++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/lib/openjp2/image.c b/src/lib/openjp2/image.c index d00a2370..ecd65ece 100644 --- a/src/lib/openjp2/image.c +++ b/src/lib/openjp2/image.c @@ -68,19 +68,21 @@ opj_image_t* OPJ_CALLCONV opj_image_create(OPJ_UINT32 numcmpts, comp->prec = cmptparms[compno].prec; comp->bpp = cmptparms[compno].bpp; comp->sgnd = cmptparms[compno].sgnd; - if (comp->h != 0 && (OPJ_SIZE_T)comp->w > SIZE_MAX / comp->h) { + if (comp->h != 0 && + (OPJ_SIZE_T)comp->w > SIZE_MAX / comp->h / sizeof(OPJ_INT32)) { // TODO event manager opj_image_destroy(image); return NULL; } - comp->data = (OPJ_INT32*) opj_calloc((OPJ_SIZE_T)comp->w * comp->h, - sizeof(OPJ_INT32)); + comp->data = (OPJ_INT32*) opj_aligned_malloc( + (size_t)comp->w * comp->h * sizeof(OPJ_INT32)); if (!comp->data) { /* TODO replace with event manager, breaks API */ /* fprintf(stderr,"Unable to allocate memory for image.\n"); */ opj_image_destroy(image); return NULL; } + memset(comp->data, 0, (size_t)comp->w * comp->h * sizeof(OPJ_INT32)); } } @@ -97,7 +99,7 @@ void OPJ_CALLCONV opj_image_destroy(opj_image_t *image) for (compno = 0; compno < image->numcomps; compno++) { opj_image_comp_t *image_comp = &(image->comps[compno]); if (image_comp->data) { - opj_free(image_comp->data); + opj_aligned_free(image_comp->data); } } opj_free(image->comps); diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index b665924d..afbcc9c5 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -8798,15 +8798,18 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, OPJ_SIZE_T l_width = l_img_comp_dest->w; OPJ_SIZE_T l_height = l_img_comp_dest->h; - if ((l_height == 0U) || (l_width > (SIZE_MAX / l_height))) { + if ((l_height == 0U) || (l_width > (SIZE_MAX / l_height)) || + l_width * l_height > SIZE_MAX / sizeof(OPJ_INT32)) { /* would overflow */ return OPJ_FALSE; } - l_img_comp_dest->data = (OPJ_INT32*) opj_calloc(l_width * l_height, + l_img_comp_dest->data = (OPJ_INT32*) opj_aligned_malloc(l_width * l_height * sizeof(OPJ_INT32)); if (! l_img_comp_dest->data) { return OPJ_FALSE; } + /* Do we really need this memset ? */ + memset(l_img_comp_dest->data, 0, l_width * l_height * sizeof(OPJ_INT32)); } /* Copy info from decoded comp image to output image */ @@ -10416,7 +10419,7 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, /* Transfer TCD data to output image data */ for (i = 0; i < p_j2k->m_output_image->numcomps; i++) { - opj_free(p_j2k->m_output_image->comps[i].data); + opj_aligned_free(p_j2k->m_output_image->comps[i].data); p_j2k->m_output_image->comps[i].data = p_j2k->m_tcd->tcd_image->tiles->comps[i].data; p_j2k->m_output_image->comps[i].resno_decoded = @@ -10821,7 +10824,7 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k, p_j2k->m_output_image->comps[compno].resno_decoded; if (p_image->comps[compno].data) { - opj_free(p_image->comps[compno].data); + opj_aligned_free(p_image->comps[compno].data); } p_image->comps[compno].data = p_j2k->m_output_image->comps[compno].data; From f58aab9d6a57e48bdc60e15dd373c24de74719a9 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 01:47:40 +0200 Subject: [PATCH 3/8] Add opj_image_data_alloc() / opj_image_data_free() As bin/common/color.c used to directly call malloc()/free(), we need to export functions dedicated to allocating/freeing image component data. --- src/bin/common/color.c | 114 ++++++++++++++++++------------------- src/lib/openjp2/image.c | 4 +- src/lib/openjp2/j2k.c | 6 +- src/lib/openjp2/openjpeg.c | 11 ++++ src/lib/openjp2/openjpeg.h | 21 +++++++ src/lib/openjp2/tcd.c | 8 +-- 6 files changed, 98 insertions(+), 66 deletions(-) diff --git a/src/bin/common/color.c b/src/bin/common/color.c index 8bb96043..665558f5 100644 --- a/src/bin/common/color.c +++ b/src/bin/common/color.c @@ -122,9 +122,9 @@ static void sycc444_to_rgb(opj_image_t *img) cb = img->comps[1].data; cr = img->comps[2].data; - d0 = r = (int*)malloc(sizeof(int) * max); - d1 = g = (int*)malloc(sizeof(int) * max); - d2 = b = (int*)malloc(sizeof(int) * max); + d0 = r = (int*)opj_image_data_alloc(sizeof(int) * max); + d1 = g = (int*)opj_image_data_alloc(sizeof(int) * max); + d2 = b = (int*)opj_image_data_alloc(sizeof(int) * max); if (r == NULL || g == NULL || b == NULL) { goto fails; @@ -139,19 +139,19 @@ static void sycc444_to_rgb(opj_image_t *img) ++g; ++b; } - free(img->comps[0].data); + opj_image_data_free(img->comps[0].data); img->comps[0].data = d0; - free(img->comps[1].data); + opj_image_data_free(img->comps[1].data); img->comps[1].data = d1; - free(img->comps[2].data); + opj_image_data_free(img->comps[2].data); img->comps[2].data = d2; img->color_space = OPJ_CLRSPC_SRGB; return; fails: - free(r); - free(g); - free(b); + opj_image_data_free(r); + opj_image_data_free(g); + opj_image_data_free(b); }/* sycc444_to_rgb() */ static void sycc422_to_rgb(opj_image_t *img) @@ -174,9 +174,9 @@ static void sycc422_to_rgb(opj_image_t *img) cb = img->comps[1].data; cr = img->comps[2].data; - d0 = r = (int*)malloc(sizeof(int) * max); - d1 = g = (int*)malloc(sizeof(int) * max); - d2 = b = (int*)malloc(sizeof(int) * max); + d0 = r = (int*)opj_image_data_alloc(sizeof(int) * max); + d1 = g = (int*)opj_image_data_alloc(sizeof(int) * max); + d2 = b = (int*)opj_image_data_alloc(sizeof(int) * max); if (r == NULL || g == NULL || b == NULL) { goto fails; @@ -222,11 +222,11 @@ static void sycc422_to_rgb(opj_image_t *img) } } - free(img->comps[0].data); + opj_image_data_free(img->comps[0].data); img->comps[0].data = d0; - free(img->comps[1].data); + opj_image_data_free(img->comps[1].data); img->comps[1].data = d1; - free(img->comps[2].data); + opj_image_data_free(img->comps[2].data); img->comps[2].data = d2; img->comps[1].w = img->comps[2].w = img->comps[0].w; @@ -237,9 +237,9 @@ static void sycc422_to_rgb(opj_image_t *img) return; fails: - free(r); - free(g); - free(b); + opj_image_data_free(r); + opj_image_data_free(g); + opj_image_data_free(b); }/* sycc422_to_rgb() */ static void sycc420_to_rgb(opj_image_t *img) @@ -262,9 +262,9 @@ static void sycc420_to_rgb(opj_image_t *img) cb = img->comps[1].data; cr = img->comps[2].data; - d0 = r = (int*)malloc(sizeof(int) * max); - d1 = g = (int*)malloc(sizeof(int) * max); - d2 = b = (int*)malloc(sizeof(int) * max); + d0 = r = (int*)opj_image_data_alloc(sizeof(int) * max); + d1 = g = (int*)opj_image_data_alloc(sizeof(int) * max); + d2 = b = (int*)opj_image_data_alloc(sizeof(int) * max); if (r == NULL || g == NULL || b == NULL) { goto fails; @@ -380,11 +380,11 @@ static void sycc420_to_rgb(opj_image_t *img) } } - free(img->comps[0].data); + opj_image_data_free(img->comps[0].data); img->comps[0].data = d0; - free(img->comps[1].data); + opj_image_data_free(img->comps[1].data); img->comps[1].data = d1; - free(img->comps[2].data); + opj_image_data_free(img->comps[2].data); img->comps[2].data = d2; img->comps[1].w = img->comps[2].w = img->comps[0].w; @@ -395,9 +395,9 @@ static void sycc420_to_rgb(opj_image_t *img) return; fails: - free(r); - free(g); - free(b); + opj_image_data_free(r); + opj_image_data_free(g); + opj_image_data_free(b); }/* sycc420_to_rgb() */ void color_sycc_to_rgb(opj_image_t *img) @@ -574,8 +574,8 @@ void color_apply_icc_profile(opj_image_t *image) max = max_w * max_h; nr_samples = (size_t)(max * 3U * sizeof(unsigned char)); - in = inbuf = (unsigned char*)malloc(nr_samples); - out = outbuf = (unsigned char*)malloc(nr_samples); + in = inbuf = (unsigned char*)opj_image_data_alloc(nr_samples); + out = outbuf = (unsigned char*)opj_image_data_alloc(nr_samples); if (inbuf == NULL || outbuf == NULL) { goto fails0; @@ -605,15 +605,15 @@ void color_apply_icc_profile(opj_image_t *image) ok = 1; fails0: - free(inbuf); - free(outbuf); + opj_image_data_free(inbuf); + opj_image_data_free(outbuf); } else { /* prec > 8 */ unsigned short *inbuf, *outbuf, *in, *out; max = max_w * max_h; nr_samples = (size_t)(max * 3U * sizeof(unsigned short)); - in = inbuf = (unsigned short*)malloc(nr_samples); - out = outbuf = (unsigned short*)malloc(nr_samples); + in = inbuf = (unsigned short*)opj_image_data_alloc(nr_samples); + out = outbuf = (unsigned short*)opj_image_data_alloc(nr_samples); if (inbuf == NULL || outbuf == NULL) { goto fails1; @@ -643,8 +643,8 @@ fails0: ok = 1; fails1: - free(inbuf); - free(outbuf); + opj_image_data_free(inbuf); + opj_image_data_free(outbuf); } } else { /* image->numcomps <= 2 : GRAY, GRAYA */ if (prec <= 8) { @@ -653,8 +653,8 @@ fails1: max = max_w * max_h; nr_samples = (size_t)(max * 3 * sizeof(unsigned char)); - in = inbuf = (unsigned char*)malloc(nr_samples); - out = outbuf = (unsigned char*)malloc(nr_samples); + in = inbuf = (unsigned char*)opj_image_data_alloc(nr_samples); + out = outbuf = (unsigned char*)opj_image_data_alloc(nr_samples); g = (int*)calloc((size_t)max, sizeof(int)); b = (int*)calloc((size_t)max, sizeof(int)); @@ -703,18 +703,18 @@ fails1: ok = 1; fails2: - free(inbuf); - free(outbuf); - free(g); - free(b); + opj_image_data_free(inbuf); + opj_image_data_free(outbuf); + opj_image_data_free(g); + opj_image_data_free(b); } else { /* prec > 8 */ unsigned short *in, *inbuf, *out, *outbuf; opj_image_comp_t *new_comps; max = max_w * max_h; nr_samples = (size_t)(max * 3U * sizeof(unsigned short)); - in = inbuf = (unsigned short*)malloc(nr_samples); - out = outbuf = (unsigned short*)malloc(nr_samples); + in = inbuf = (unsigned short*)opj_image_data_alloc(nr_samples); + out = outbuf = (unsigned short*)opj_image_data_alloc(nr_samples); g = (int*)calloc((size_t)max, sizeof(int)); b = (int*)calloc((size_t)max, sizeof(int)); @@ -763,10 +763,10 @@ fails2: ok = 1; fails3: - free(inbuf); - free(outbuf); - free(g); - free(b); + opj_image_data_free(inbuf); + opj_image_data_free(outbuf); + opj_image_data_free(g); + opj_image_data_free(b); } }/* if(image->numcomps > 2) */ @@ -881,9 +881,9 @@ void color_cielab_to_rgb(opj_image_t *image) max = image->comps[0].w * image->comps[0].h; - red = dst0 = (int*)malloc(max * sizeof(int)); - green = dst1 = (int*)malloc(max * sizeof(int)); - blue = dst2 = (int*)malloc(max * sizeof(int)); + red = dst0 = (int*)opj_image_data_alloc(max * sizeof(int)); + green = dst1 = (int*)opj_image_data_alloc(max * sizeof(int)); + blue = dst2 = (int*)opj_image_data_alloc(max * sizeof(int)); if (red == NULL || green == NULL || blue == NULL) { goto fails; @@ -917,11 +917,11 @@ void color_cielab_to_rgb(opj_image_t *image) cmsCloseProfile(in); cmsCloseProfile(out); #endif - free(src0); + opj_image_data_free(src0); image->comps[0].data = dst0; - free(src1); + opj_image_data_free(src1); image->comps[1].data = dst1; - free(src2); + opj_image_data_free(src2); image->comps[2].data = dst2; image->color_space = new_space; @@ -938,13 +938,13 @@ fails: cmsCloseProfile(out); #endif if (red) { - free(red); + opj_image_data_free(red); } if (green) { - free(green); + opj_image_data_free(green); } if (blue) { - free(blue); + opj_image_data_free(blue); } return; } @@ -1004,7 +1004,7 @@ void color_cmyk_to_rgb(opj_image_t *image) image->comps[2].data[i] = (int)(255.0F * Y * K); /* B */ } - free(image->comps[3].data); + opj_image_data_free(image->comps[3].data); image->comps[3].data = NULL; image->comps[0].prec = 8; image->comps[1].prec = 8; diff --git a/src/lib/openjp2/image.c b/src/lib/openjp2/image.c index ecd65ece..e29172b2 100644 --- a/src/lib/openjp2/image.c +++ b/src/lib/openjp2/image.c @@ -74,7 +74,7 @@ opj_image_t* OPJ_CALLCONV opj_image_create(OPJ_UINT32 numcmpts, opj_image_destroy(image); return NULL; } - comp->data = (OPJ_INT32*) opj_aligned_malloc( + comp->data = (OPJ_INT32*) opj_image_data_alloc( (size_t)comp->w * comp->h * sizeof(OPJ_INT32)); if (!comp->data) { /* TODO replace with event manager, breaks API */ @@ -99,7 +99,7 @@ void OPJ_CALLCONV opj_image_destroy(opj_image_t *image) for (compno = 0; compno < image->numcomps; compno++) { opj_image_comp_t *image_comp = &(image->comps[compno]); if (image_comp->data) { - opj_aligned_free(image_comp->data); + opj_image_data_free(image_comp->data); } } opj_free(image->comps); diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index afbcc9c5..9de69cec 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -8803,7 +8803,7 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, OPJ_BYTE * p_data, /* would overflow */ return OPJ_FALSE; } - l_img_comp_dest->data = (OPJ_INT32*) opj_aligned_malloc(l_width * l_height * + l_img_comp_dest->data = (OPJ_INT32*) opj_image_data_alloc(l_width * l_height * sizeof(OPJ_INT32)); if (! l_img_comp_dest->data) { return OPJ_FALSE; @@ -10419,7 +10419,7 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, /* Transfer TCD data to output image data */ for (i = 0; i < p_j2k->m_output_image->numcomps; i++) { - opj_aligned_free(p_j2k->m_output_image->comps[i].data); + opj_image_data_free(p_j2k->m_output_image->comps[i].data); p_j2k->m_output_image->comps[i].data = p_j2k->m_tcd->tcd_image->tiles->comps[i].data; p_j2k->m_output_image->comps[i].resno_decoded = @@ -10824,7 +10824,7 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k, p_j2k->m_output_image->comps[compno].resno_decoded; if (p_image->comps[compno].data) { - opj_aligned_free(p_image->comps[compno].data); + opj_image_data_free(p_image->comps[compno].data); } p_image->comps[compno].data = p_j2k->m_output_image->comps[compno].data; diff --git a/src/lib/openjp2/openjpeg.c b/src/lib/openjp2/openjpeg.c index 0071c03d..8293a58d 100644 --- a/src/lib/openjp2/openjpeg.c +++ b/src/lib/openjp2/openjpeg.c @@ -1007,3 +1007,14 @@ opj_stream_t* OPJ_CALLCONV opj_stream_create_file_stream( return l_stream; } + + +void* OPJ_CALLCONV opj_image_data_alloc(OPJ_SIZE_T size) +{ + return opj_aligned_malloc(size); +} + +void OPJ_CALLCONV opj_image_data_free(void* ptr) +{ + opj_aligned_free(ptr); +} diff --git a/src/lib/openjp2/openjpeg.h b/src/lib/openjp2/openjpeg.h index 0aea383b..5fbf7ac3 100644 --- a/src/lib/openjp2/openjpeg.h +++ b/src/lib/openjp2/openjpeg.h @@ -1093,6 +1093,27 @@ OPJ_API void OPJ_CALLCONV opj_image_destroy(opj_image_t *image); OPJ_API opj_image_t* OPJ_CALLCONV opj_image_tile_create(OPJ_UINT32 numcmpts, opj_image_cmptparm_t *cmptparms, OPJ_COLOR_SPACE clrspc); +/** + * Allocator for opj_image_t->comps[].data + * To be paired with opj_image_data_free. + * + * @param size number of bytes to allocate + * + * @return a new pointer if successful, NULL otherwise. + * @since 2.2.0 +*/ +OPJ_API void* OPJ_CALLCONV opj_image_data_alloc(OPJ_SIZE_T size); + +/** + * Destructor for opj_image_t->comps[].data + * To be paired with opj_image_data_alloc. + * + * @param ptr Pointer to free + * + * @since 2.2.0 +*/ +OPJ_API void OPJ_CALLCONV opj_image_data_free(void* ptr); + /* ========================================================== stream functions definitions diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 1213bffe..2189dcc3 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -674,7 +674,7 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) if ((l_tilec->data == 00) || ((l_tilec->data_size_needed > l_tilec->data_size) && (l_tilec->ownsData == OPJ_FALSE))) { - l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed); + l_tilec->data = (OPJ_INT32 *) opj_image_data_alloc(l_tilec->data_size_needed); if (! l_tilec->data) { return OPJ_FALSE; } @@ -683,8 +683,8 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) l_tilec->ownsData = OPJ_TRUE; } else if (l_tilec->data_size_needed > l_tilec->data_size) { /* We don't need to keep old data */ - opj_aligned_free(l_tilec->data); - l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed); + opj_image_data_free(l_tilec->data); + l_tilec->data = (OPJ_INT32 *) opj_image_data_alloc(l_tilec->data_size_needed); if (! l_tilec->data) { l_tilec->data_size = 0; l_tilec->data_size_needed = 0; @@ -1645,7 +1645,7 @@ static void opj_tcd_free_tile(opj_tcd_t *p_tcd) } if (l_tile_comp->ownsData && l_tile_comp->data) { - opj_aligned_free(l_tile_comp->data); + opj_image_data_free(l_tile_comp->data); l_tile_comp->data = 00; l_tile_comp->ownsData = 0; l_tile_comp->data_size = 0; From 0c1fc0593ef5b918068709fd74d76e4824c90e7c Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 02:02:25 +0200 Subject: [PATCH 4/8] Complementary fix to previous commit --- src/bin/common/color.c | 8 ++++---- src/lib/openjp2/openjpeg.c | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/bin/common/color.c b/src/bin/common/color.c index 665558f5..185b547e 100644 --- a/src/bin/common/color.c +++ b/src/bin/common/color.c @@ -655,8 +655,8 @@ fails1: nr_samples = (size_t)(max * 3 * sizeof(unsigned char)); in = inbuf = (unsigned char*)opj_image_data_alloc(nr_samples); out = outbuf = (unsigned char*)opj_image_data_alloc(nr_samples); - g = (int*)calloc((size_t)max, sizeof(int)); - b = (int*)calloc((size_t)max, sizeof(int)); + g = (int*)opj_image_data_alloc((size_t)max * sizeof(int)); + b = (int*)opj_image_data_alloc((size_t)max * sizeof(int)); if (inbuf == NULL || outbuf == NULL || g == NULL || b == NULL) { goto fails2; @@ -715,8 +715,8 @@ fails2: nr_samples = (size_t)(max * 3U * sizeof(unsigned short)); in = inbuf = (unsigned short*)opj_image_data_alloc(nr_samples); out = outbuf = (unsigned short*)opj_image_data_alloc(nr_samples); - g = (int*)calloc((size_t)max, sizeof(int)); - b = (int*)calloc((size_t)max, sizeof(int)); + g = (int*)opj_image_data_alloc((size_t)max * sizeof(int)); + b = (int*)opj_image_data_alloc((size_t)max * sizeof(int)); if (inbuf == NULL || outbuf == NULL || g == NULL || b == NULL) { goto fails3; diff --git a/src/lib/openjp2/openjpeg.c b/src/lib/openjp2/openjpeg.c index 8293a58d..4e649a74 100644 --- a/src/lib/openjp2/openjpeg.c +++ b/src/lib/openjp2/openjpeg.c @@ -1011,10 +1011,13 @@ opj_stream_t* OPJ_CALLCONV opj_stream_create_file_stream( void* OPJ_CALLCONV opj_image_data_alloc(OPJ_SIZE_T size) { - return opj_aligned_malloc(size); + void* ret = opj_aligned_malloc(size); + /* printf("opj_image_data_alloc %p\n", ret); */ + return ret; } void OPJ_CALLCONV opj_image_data_free(void* ptr) { + /* printf("opj_image_data_free %p\n", ptr); */ opj_aligned_free(ptr); } From 434ace4ff74cc3eee401d4d3c02668c1d85f2a0d Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 02:17:26 +0200 Subject: [PATCH 5/8] opj_jp2_apply_pclr() also needs to use opj_image_data_alloc/opj_image_data_free --- src/lib/openjp2/jp2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/openjp2/jp2.c b/src/lib/openjp2/jp2.c index 1211ab5d..33a10d30 100644 --- a/src/lib/openjp2/jp2.c +++ b/src/lib/openjp2/jp2.c @@ -1065,7 +1065,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, /* Palette mapping: */ new_comps[i].data = (OPJ_INT32*) - opj_malloc(old_comps[cmp].w * old_comps[cmp].h * sizeof(OPJ_INT32)); + opj_image_data_alloc(old_comps[cmp].w * old_comps[cmp].h * sizeof(OPJ_INT32)); if (!new_comps[i].data) { while (i > 0) { -- i; @@ -1119,7 +1119,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, max = image->numcomps; for (i = 0; i < max; ++i) { if (old_comps[i].data) { - opj_free(old_comps[i].data); + opj_image_data_free(old_comps[i].data); } } From 373520db309430b68dd9ff09cba03a25f711a88e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 13:23:29 +0200 Subject: [PATCH 6/8] Add documentation for magic values in the code --- src/lib/openjp2/t2.c | 4 ++++ src/lib/openjp2/tcd.c | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c index 93b77ef0..760e17ac 100644 --- a/src/lib/openjp2/t2.c +++ b/src/lib/openjp2/t2.c @@ -1491,6 +1491,10 @@ static OPJ_BOOL opj_t2_init_seg(opj_tcd_cblk_dec_t* cblk, ((seg - 1)->maxpasses == 10)) ? 2 : 1; } } else { + /* See paragraph "B.10.6 Number of coding passes" of the standard. + * Probably that 109 must be interpreted a (Mb-1)*3 + 1 with Mb=37, + * Mb being the maximum number of bit-planes available for the + * representation of coefficients in the sub-band */ seg->maxpasses = 109; } diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 2189dcc3..53e4ded3 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -965,8 +965,10 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, numbps = (OPJ_INT32)(l_image_comp->prec + l_gain); l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction; + /* Mb value of Equation E-2 in "E.1 Inverse quantization + * procedure" of the standard */ l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - - 1; /* WHY -1 ? */ + 1; if (!l_band->precincts && (l_nb_precincts > 0U)) { l_band->precincts = (opj_tcd_precinct_t *) opj_malloc(/*3 * */ From ca34d13e76a588a00171e57690c1deeaf068723a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 16:11:11 +0200 Subject: [PATCH 7/8] Decoding: do not allocate memory for the codestream of each codeblock Currently we allocate at least 8192 bytes for each codeblock, and copy the relevant parts of the codestream in that per-codeblock buffer as we decode packets. As the whole codestream for the tile is ingested in memory and alive during the decoding, we can directly point to it instead of copying. But to do that, we need an intermediate concept, a 'chunk' of code-stream segment, given that segments may be made of data at different places in the code-stream when quality layers are used. With that change, the decoding of MAPA_005.jp2 goes down from the previous improvement of 2.7 GB down to 1.9 GB. New profile: n4: 1885648469 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E78287: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D7B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E7272C: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BDD9: opj_j2k_read_tile_header (j2k.c:8618) n1: 1610689344 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 1610689344 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 1610689344 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BBF0: opj_j2k_read_tile_header (j2k.c:4685) n1: 219232541 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 219232541 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 219232541 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 39822000 0x4E727A9: opj_tcd_init_decode_tile (tcd.c:1219) n1: 39822000 0x4E4BDD9: opj_j2k_read_tile_header (j2k.c:8618) n1: 39822000 0x4E4C8A2: opj_j2k_decode_tiles (j2k.c:10349) n1: 39822000 0x4E4E36E: opj_j2k_decode (j2k.c:7847) n1: 39822000 0x4E52FA2: opj_jp2_decode (jp2.c:1564) n0: 39822000 0x40374E: main (opj_decompress.c:1459) n0: 15904584 in 52 places, all below massif's threshold (1.00%) --- src/lib/openjp2/j2k.c | 26 +++++++++++++-- src/lib/openjp2/opj_common.h | 1 - src/lib/openjp2/t1.c | 50 ++++++++++++++++++++++++++--- src/lib/openjp2/t1.h | 6 ++++ src/lib/openjp2/t2.c | 59 +++++++++++++--------------------- src/lib/openjp2/tcd.c | 36 +++++++++++---------- src/lib/openjp2/tcd.h | 61 +++++++++++++++++++++++------------- 7 files changed, 154 insertions(+), 85 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 9de69cec..3737655b 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -4684,15 +4684,35 @@ static OPJ_BOOL opj_j2k_read_sod(opj_j2k_t *p_j2k, "Tile part length size inconsistent with stream length\n"); return OPJ_FALSE; } + if (p_j2k->m_specific_param.m_decoder.m_sot_length > + UINT_MAX - OPJ_COMMON_CBLK_DATA_EXTRA) { + opj_event_msg(p_manager, EVT_ERROR, + "p_j2k->m_specific_param.m_decoder.m_sot_length > " + "UINT_MAX - OPJ_COMMON_CBLK_DATA_EXTRA"); + return OPJ_FALSE; + } + /* Add a margin of OPJ_COMMON_CBLK_DATA_EXTRA to the allocation we */ + /* do so that opj_mqc_init_dec_common() can safely add a synthetic */ + /* 0xFFFF marker. */ if (! *l_current_data) { /* LH: oddly enough, in this path, l_tile_len!=0. * TODO: If this was consistent, we could simplify the code to only use realloc(), as realloc(0,...) default to malloc(0,...). */ *l_current_data = (OPJ_BYTE*) opj_malloc( - p_j2k->m_specific_param.m_decoder.m_sot_length); + p_j2k->m_specific_param.m_decoder.m_sot_length + OPJ_COMMON_CBLK_DATA_EXTRA); } else { - OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(*l_current_data, - *l_tile_len + p_j2k->m_specific_param.m_decoder.m_sot_length); + OPJ_BYTE *l_new_current_data; + if (*l_tile_len > UINT_MAX - OPJ_COMMON_CBLK_DATA_EXTRA - + p_j2k->m_specific_param.m_decoder.m_sot_length) { + opj_event_msg(p_manager, EVT_ERROR, + "*l_tile_len > UINT_MAX - OPJ_COMMON_CBLK_DATA_EXTRA - " + "p_j2k->m_specific_param.m_decoder.m_sot_length"); + return OPJ_FALSE; + } + + l_new_current_data = (OPJ_BYTE *) opj_realloc(*l_current_data, + *l_tile_len + p_j2k->m_specific_param.m_decoder.m_sot_length + + OPJ_COMMON_CBLK_DATA_EXTRA); if (! l_new_current_data) { opj_free(*l_current_data); /*nothing more is done as l_current_data will be set to null, and just diff --git a/src/lib/openjp2/opj_common.h b/src/lib/openjp2/opj_common.h index 8db83fc5..a0513391 100644 --- a/src/lib/openjp2/opj_common.h +++ b/src/lib/openjp2/opj_common.h @@ -36,7 +36,6 @@ Common constants shared among several modules ========================================================== */ -#define OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE 8192 #define OPJ_COMMON_CBLK_DATA_EXTRA 2 /**< Margin for a fake FFFF marker */ #endif /* OPJ_COMMMON_H */ diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 800b6ed4..f932599b 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1604,6 +1604,8 @@ void opj_t1_destroy(opj_t1_t *p_t1) p_t1->flags = 00; } + opj_free(p_t1->segdatabuffer); + opj_free(p_t1); } @@ -1613,6 +1615,7 @@ typedef struct { opj_tcd_band_t* band; opj_tcd_tilecomp_t* tilec; opj_tccp_t* tccp; + OPJ_BOOL mustuse_segdatabuffer; volatile OPJ_BOOL* pret; opj_event_mgr_t *p_manager; opj_mutex_t* p_manager_mutex; @@ -1657,6 +1660,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) t1 = opj_t1_create(OPJ_FALSE); opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); } + t1->mustuse_segdatabuffer = job->mustuse_segdatabuffer; if (OPJ_FALSE == opj_t1_decode_cblk( t1, @@ -1786,6 +1790,7 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp, job->p_manager_mutex = p_manager_mutex; job->p_manager = p_manager; job->check_pterm = check_pterm; + job->mustuse_segdatabuffer = opj_thread_pool_get_thread_count(tp) > 1; opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job); if (!(*pret)) { return; @@ -1846,19 +1851,54 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, for (segno = 0; segno < cblk->real_num_segs; ++segno) { opj_tcd_seg_t *seg = &cblk->segs[segno]; + OPJ_BYTE* segdata; + OPJ_UINT32 seglen; /* BYPASS mode */ type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) && (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ; - /* FIXME: slviewer gets here with a null pointer. Why? Partially downloaded and/or corrupt textures? */ - if (seg->data == 00) { - continue; + + /* Even if we have a single chunk, in mulithtreaded decoding */ + /* the insertion of our synthetic marker might potentially override */ + /* valid codestream of other codeblocks decoded in parallel. */ + if (seg->numchunks == 1 && !(t1->mustuse_segdatabuffer)) { + segdata = seg->chunks[0].data; + seglen = seg->chunks[0].len; + } else { + OPJ_UINT32 i; + + /* Compute whole segment length from chunk lengths */ + seglen = 0; + for (i = 0; i < seg->numchunks; i++) { + seglen += seg->chunks[i].len; + } + + /* Allocate temporary memory if needed */ + if (seglen + OPJ_COMMON_CBLK_DATA_EXTRA > t1->segdatabuffersize) { + segdata = (OPJ_BYTE*)opj_realloc(t1->segdatabuffer, + seglen + OPJ_COMMON_CBLK_DATA_EXTRA); + if (segdata == NULL) { + return OPJ_FALSE; + } + t1->segdatabuffer = segdata; + memset(t1->segdatabuffer + seglen, 0, OPJ_COMMON_CBLK_DATA_EXTRA); + t1->segdatabuffersize = seglen + OPJ_COMMON_CBLK_DATA_EXTRA; + } + + /* Concatenate all segments chunks */ + segdata = t1->segdatabuffer; + seglen = 0; + for (i = 0; i < seg->numchunks; i++) { + memcpy(segdata + seglen, seg->chunks[i].data, seg->chunks[i].len); + seglen += seg->chunks[i].len; + } } + if (type == T1_TYPE_RAW) { - opj_mqc_raw_init_dec(mqc, (*seg->data) + seg->dataindex, seg->len, + opj_mqc_raw_init_dec(mqc, segdata, seglen, OPJ_COMMON_CBLK_DATA_EXTRA); } else { - opj_mqc_init_dec(mqc, (*seg->data) + seg->dataindex, seg->len, + opj_mqc_init_dec(mqc, segdata, seglen, OPJ_COMMON_CBLK_DATA_EXTRA); } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 6802d188..da8b0c80 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -200,6 +200,12 @@ typedef struct opj_t1 { OPJ_UINT32 flagssize; OPJ_UINT32 data_stride; OPJ_BOOL encoder; + + /* Thre 3 variables below are only used by the decoder */ + OPJ_BOOL mustuse_segdatabuffer; /* set to TRUE in multithreaded context */ + OPJ_BYTE + *segdatabuffer; /* Temporary buffer to concatenate all chunks of a segment */ + OPJ_UINT32 segdatabuffersize; /* Maximum size available in segdatabuffer */ } opj_t1_t; /** @name Exported functions */ diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c index 760e17ac..5d5e33ea 100644 --- a/src/lib/openjp2/t2.c +++ b/src/lib/openjp2/t2.c @@ -1245,7 +1245,6 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, if (!l_cblk->numsegs) { l_seg = l_cblk->segs; ++l_cblk->numsegs; - l_cblk->data_current_size = 0; } else { l_seg = &l_cblk->segs[l_cblk->numsegs - 1]; @@ -1287,46 +1286,30 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, }; #endif /* USE_JPWL */ - /* Check possible overflow on size */ - if ((l_cblk->data_current_size + l_seg->newlen + OPJ_COMMON_CBLK_DATA_EXTRA) < - l_cblk->data_current_size) { - opj_event_msg(p_manager, EVT_ERROR, - "read: segment too long (%d) with current size (%d > %d) for codeblock %d (p=%d, b=%d, r=%d, c=%d)\n", - l_seg->newlen, l_cblk->data_current_size, 0xFFFFFFFF - l_seg->newlen, cblkno, - p_pi->precno, bandno, p_pi->resno, p_pi->compno); - return OPJ_FALSE; - } - /* Check if the cblk->data have allocated enough memory */ - if ((l_cblk->data_current_size + l_seg->newlen + OPJ_COMMON_CBLK_DATA_EXTRA) > - l_cblk->data_max_size) { - OPJ_BYTE* new_cblk_data = (OPJ_BYTE*) opj_realloc(l_cblk->data, - l_cblk->data_current_size + l_seg->newlen + OPJ_COMMON_CBLK_DATA_EXTRA); - if (! new_cblk_data) { - opj_free(l_cblk->data); - l_cblk->data = NULL; - l_cblk->data_max_size = 0; - /* opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to realloc code block cata!\n"); */ + + if (l_seg->numchunks == l_seg->numchunksalloc) { + OPJ_UINT32 l_numchunksalloc = l_seg->numchunksalloc * 2 + 1; + opj_tcd_seg_data_chunk_t* l_chunks = + (opj_tcd_seg_data_chunk_t*)opj_realloc(l_seg->chunks, + l_numchunksalloc * sizeof(opj_tcd_seg_data_chunk_t)); + if (l_chunks == NULL) { + opj_event_msg(p_manager, EVT_ERROR, + "cannot allocate opj_tcd_seg_data_chunk_t* array"); return OPJ_FALSE; } - l_cblk->data_max_size = l_cblk->data_current_size + l_seg->newlen + - OPJ_COMMON_CBLK_DATA_EXTRA; - l_cblk->data = new_cblk_data; + l_seg->chunks = l_chunks; + l_seg->numchunksalloc = l_numchunksalloc; } - memcpy(l_cblk->data + l_cblk->data_current_size, l_current_data, l_seg->newlen); - - if (l_seg->numpasses == 0) { - l_seg->data = &l_cblk->data; - l_seg->dataindex = l_cblk->data_current_size; - } + l_seg->chunks[l_seg->numchunks].data = l_current_data; + l_seg->chunks[l_seg->numchunks].len = l_seg->newlen; + l_seg->numchunks ++; l_current_data += l_seg->newlen; l_seg->numpasses += l_seg->numnewpasses; l_cblk->numnewpasses -= l_seg->numnewpasses; l_seg->real_num_passes = l_seg->numpasses; - l_cblk->data_current_size += l_seg->newlen; - l_seg->len += l_seg->newlen; if (l_cblk->numnewpasses > 0) { ++l_seg; @@ -1391,7 +1374,6 @@ static OPJ_BOOL opj_t2_skip_packet_data(opj_t2_t* p_t2, if (!l_cblk->numsegs) { l_seg = l_cblk->segs; ++l_cblk->numsegs; - l_cblk->data_current_size = 0; } else { l_seg = &l_cblk->segs[l_cblk->numsegs - 1]; @@ -1464,22 +1446,23 @@ static OPJ_BOOL opj_t2_init_seg(opj_tcd_cblk_dec_t* cblk, if (l_nb_segs > cblk->m_current_max_segs) { opj_tcd_seg_t* new_segs; - cblk->m_current_max_segs += OPJ_J2K_DEFAULT_NB_SEGS; + OPJ_UINT32 l_m_current_max_segs = cblk->m_current_max_segs + + OPJ_J2K_DEFAULT_NB_SEGS; new_segs = (opj_tcd_seg_t*) opj_realloc(cblk->segs, - cblk->m_current_max_segs * sizeof(opj_tcd_seg_t)); + l_m_current_max_segs * sizeof(opj_tcd_seg_t)); if (! new_segs) { - opj_free(cblk->segs); - cblk->segs = NULL; - cblk->m_current_max_segs = 0; /* opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to initialize segment %d\n", l_nb_segs); */ return OPJ_FALSE; } cblk->segs = new_segs; + memset(new_segs + cblk->m_current_max_segs, + 0, OPJ_J2K_DEFAULT_NB_SEGS * sizeof(opj_tcd_seg_t)); + cblk->m_current_max_segs = l_m_current_max_segs; } seg = &cblk->segs[index]; - memset(seg, 0, sizeof(opj_tcd_seg_t)); + opj_tcd_reinit_segment(seg); if (cblksty & J2K_CCP_CBLKSTY_TERMALL) { seg->maxpasses = 1; diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 53e4ded3..4c4839d0 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1210,20 +1210,23 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t * return OPJ_TRUE; } + +void opj_tcd_reinit_segment(opj_tcd_seg_t* seg) +{ + opj_tcd_seg_data_chunk_t *l_chunks = seg->chunks; + OPJ_UINT32 l_numchunksalloc = seg->numchunksalloc; + memset(seg, 0, sizeof(opj_tcd_seg_t)); + seg->chunks = l_chunks; + seg->numchunksalloc = l_numchunksalloc; +} + /** * Allocates memory for a decoding code block. */ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * p_code_block) { - if (! p_code_block->data) { - - p_code_block->data = (OPJ_BYTE*) opj_malloc(OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE); - if (! p_code_block->data) { - return OPJ_FALSE; - } - p_code_block->data_max_size = OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE; - /*fprintf(stderr, "Allocate 8192 elements of code_block->data\n");*/ + if (! p_code_block->segs) { p_code_block->segs = (opj_tcd_seg_t *) opj_calloc(OPJ_J2K_DEFAULT_NB_SEGS, sizeof(opj_tcd_seg_t)); @@ -1236,16 +1239,16 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * /*fprintf(stderr, "m_current_max_segs of code_block->data = %d\n", p_code_block->m_current_max_segs);*/ } else { /* sanitize */ - OPJ_BYTE* l_data = p_code_block->data; - OPJ_UINT32 l_data_max_size = p_code_block->data_max_size; opj_tcd_seg_t * l_segs = p_code_block->segs; OPJ_UINT32 l_current_max_segs = p_code_block->m_current_max_segs; + OPJ_UINT32 i; memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t)); - p_code_block->data = l_data; - p_code_block->data_max_size = l_data_max_size; p_code_block->segs = l_segs; p_code_block->m_current_max_segs = l_current_max_segs; + for (i = 0; i < l_current_max_segs; ++i) { + opj_tcd_reinit_segment(&l_segs[i]); + } } return OPJ_TRUE; @@ -1948,12 +1951,11 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct) for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) { - if (l_code_block->data) { - opj_free(l_code_block->data); - l_code_block->data = 00; - } - if (l_code_block->segs) { + OPJ_UINT32 i; + for (i = 0; i < l_code_block->m_current_max_segs; ++ i) { + opj_free(l_code_block->segs[i].chunks); + } opj_free(l_code_block->segs); l_code_block->segs = 00; } diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index cd750d1b..0de888d5 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -49,19 +49,6 @@ each other. The functions in TCD.C are used by other functions in J2K.C. /** @defgroup TCD TCD - Implementation of a tile coder/decoder */ /*@{*/ -/** -FIXME DOC -*/ -typedef struct opj_tcd_seg { - OPJ_BYTE ** data; - OPJ_UINT32 dataindex; - OPJ_UINT32 numpasses; - OPJ_UINT32 real_num_passes; - OPJ_UINT32 len; - OPJ_UINT32 maxpasses; - OPJ_UINT32 numnewpasses; - OPJ_UINT32 newlen; -} opj_tcd_seg_t; /** FIXME DOC @@ -102,19 +89,48 @@ typedef struct opj_tcd_cblk_enc { } opj_tcd_cblk_enc_t; +/** Chunk of codestream data that is part of a T1 segment */ +typedef struct opj_tcd_seg_data_chunk { + OPJ_BYTE * + data; /* Point to tilepart buffer. We don't make a copy ! + So the tilepart buffer must be kept alive + as long as we need to decode the codeblocks */ + OPJ_UINT32 len; /* Usable length of data */ +} opj_tcd_seg_data_chunk_t; + +/** Segment of a code-block. + * A segment represent a number of consecutive coding passes, without termination + * of MQC or RAW between them. */ +typedef struct opj_tcd_seg { + opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */ + OPJ_UINT32 numchunks; /* Number of valid chunks items */ + OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */ + OPJ_UINT32 + numpasses; /* Number of passes decoded. Including those that we skip */ + OPJ_UINT32 + real_num_passes; /* Number of passes actually to be decoded. To be used for code-block decoding */ + OPJ_UINT32 maxpasses; /* Maximum number of passes for this segment */ + OPJ_UINT32 + numnewpasses; /* Number of new passes for current packed. Transitory value */ + OPJ_UINT32 + newlen; /* Codestream length for this segment for current packed. Transitory value */ +} opj_tcd_seg_t; + +/* Code-block for decoding */ typedef struct opj_tcd_cblk_dec { - OPJ_BYTE * data; /* Data */ opj_tcd_seg_t* segs; /* segments information */ OPJ_INT32 x0, y0, x1, y1; /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */ OPJ_UINT32 numbps; - OPJ_UINT32 numlenbits; - OPJ_UINT32 data_max_size; /* Size of allocated data buffer */ - OPJ_UINT32 data_current_size; /* Size of used data buffer */ - OPJ_UINT32 numnewpasses; /* number of pass added to the code-blocks */ - OPJ_UINT32 numsegs; /* number of segments */ - OPJ_UINT32 real_num_segs; - OPJ_UINT32 m_current_max_segs; + OPJ_UINT32 + numlenbits; /* number of bits for len,, for the current packet. Transitory value */ + OPJ_UINT32 + numnewpasses; /* number of pass added to the code-blocks, for the current packet. Transitory value */ + OPJ_UINT32 + numsegs; /* number of segments, including those of packet we skip */ + OPJ_UINT32 + real_num_segs; /* number of segments, to be used for code block decoding */ + OPJ_UINT32 m_current_max_segs; /* allocated number of segs[] items */ } opj_tcd_cblk_dec_t; /** @@ -381,6 +397,9 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec); */ OPJ_BOOL opj_tcd_is_band_empty(opj_tcd_band_t* band); +/** Reinitialize a segment, without deallocating its chunks array */ +void opj_tcd_reinit_segment(opj_tcd_seg_t* seg); + /* ----------------------------------------------------------------------- */ /*@}*/ From 92114694a48638e86c07e4adf11b78878045c8a9 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 6 Jul 2017 19:34:21 +0200 Subject: [PATCH 8/8] Slight improvement in management of code block chunks Instead of having the chunk array at the segment level, we can move it down to the codeblock itself since segments are filled in sequential order. Limit the number of memory allocation, and decrease slightly the memory usage. On MAPA_005.jp2 n4: 1871312549 (heap allocation functions) malloc/new/new[], --alloc-fns, etc. n1: 1610689344 0x4E781E7: opj_aligned_malloc (opj_malloc.c:61) n1: 1610689344 0x4E71D1B: opj_alloc_tile_component_data (tcd.c:676) n1: 1610689344 0x4E726CF: opj_tcd_init_decode_tile (tcd.c:816) n1: 1610689344 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 1610689344 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 1610689344 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 1610689344 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 1610689344 0x40374E: main (opj_decompress.c:1459) n1: 219232541 0x4E4BC50: opj_j2k_read_tile_header (j2k.c:4683) n1: 219232541 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 219232541 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 219232541 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 219232541 0x40374E: main (opj_decompress.c:1459) n1: 23893200 0x4E72735: opj_tcd_init_decode_tile (tcd.c:1225) n1: 23893200 0x4E4BE39: opj_j2k_read_tile_header (j2k.c:8617) n1: 23893200 0x4E4C902: opj_j2k_decode_tiles (j2k.c:10348) n1: 23893200 0x4E4E3CE: opj_j2k_decode (j2k.c:7846) n1: 23893200 0x4E53002: opj_jp2_decode (jp2.c:1564) n0: 23893200 0x40374E: main (opj_decompress.c:1459) n0: 17497464 in 52 places, all below massif's threshold (1.00%) --- src/lib/openjp2/t1.c | 89 ++++++++++++++++++++++--------------------- src/lib/openjp2/t1.h | 10 +++-- src/lib/openjp2/t2.c | 17 +++++---- src/lib/openjp2/tcd.c | 17 +++++---- src/lib/openjp2/tcd.h | 58 ++++++++++++++-------------- 5 files changed, 99 insertions(+), 92 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index f932599b..bd1a8800 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1604,7 +1604,7 @@ void opj_t1_destroy(opj_t1_t *p_t1) p_t1->flags = 00; } - opj_free(p_t1->segdatabuffer); + opj_free(p_t1->cblkdatabuffer); opj_free(p_t1); } @@ -1615,7 +1615,7 @@ typedef struct { opj_tcd_band_t* band; opj_tcd_tilecomp_t* tilec; opj_tccp_t* tccp; - OPJ_BOOL mustuse_segdatabuffer; + OPJ_BOOL mustuse_cblkdatabuffer; volatile OPJ_BOOL* pret; opj_event_mgr_t *p_manager; opj_mutex_t* p_manager_mutex; @@ -1660,7 +1660,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) t1 = opj_t1_create(OPJ_FALSE); opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); } - t1->mustuse_segdatabuffer = job->mustuse_segdatabuffer; + t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer; if (OPJ_FALSE == opj_t1_decode_cblk( t1, @@ -1790,7 +1790,7 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp, job->p_manager_mutex = p_manager_mutex; job->p_manager = p_manager; job->check_pterm = check_pterm; - job->mustuse_segdatabuffer = opj_thread_pool_get_thread_count(tp) > 1; + job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1; opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job); if (!(*pret)) { return; @@ -1818,6 +1818,8 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, OPJ_INT32 bpno_plus_one; OPJ_UINT32 passtype; OPJ_UINT32 segno, passno; + OPJ_BYTE* cblkdata = NULL; + OPJ_UINT32 cblkdataindex = 0; OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */ mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); @@ -1849,58 +1851,57 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); + /* Even if we have a single chunk, in multi-threaded decoding */ + /* the insertion of our synthetic marker might potentially override */ + /* valid codestream of other codeblocks decoded in parallel. */ + if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) { + OPJ_UINT32 i; + OPJ_UINT32 cblk_len; + + /* Compute whole codeblock length from chunk lengths */ + cblk_len = 0; + for (i = 0; i < cblk->numchunks; i++) { + cblk_len += cblk->chunks[i].len; + } + + /* Allocate temporary memory if needed */ + if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) { + cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer, + cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA); + if (cblkdata == NULL) { + return OPJ_FALSE; + } + t1->cblkdatabuffer = cblkdata; + memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA); + t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA; + } + + /* Concatenate all chunks */ + cblkdata = t1->cblkdatabuffer; + cblk_len = 0; + for (i = 0; i < cblk->numchunks; i++) { + memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len); + cblk_len += cblk->chunks[i].len; + } + } else if (cblk->numchunks == 1) { + cblkdata = cblk->chunks[0].data; + } + for (segno = 0; segno < cblk->real_num_segs; ++segno) { opj_tcd_seg_t *seg = &cblk->segs[segno]; - OPJ_BYTE* segdata; - OPJ_UINT32 seglen; /* BYPASS mode */ type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) && (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ; - /* Even if we have a single chunk, in mulithtreaded decoding */ - /* the insertion of our synthetic marker might potentially override */ - /* valid codestream of other codeblocks decoded in parallel. */ - if (seg->numchunks == 1 && !(t1->mustuse_segdatabuffer)) { - segdata = seg->chunks[0].data; - seglen = seg->chunks[0].len; - } else { - OPJ_UINT32 i; - - /* Compute whole segment length from chunk lengths */ - seglen = 0; - for (i = 0; i < seg->numchunks; i++) { - seglen += seg->chunks[i].len; - } - - /* Allocate temporary memory if needed */ - if (seglen + OPJ_COMMON_CBLK_DATA_EXTRA > t1->segdatabuffersize) { - segdata = (OPJ_BYTE*)opj_realloc(t1->segdatabuffer, - seglen + OPJ_COMMON_CBLK_DATA_EXTRA); - if (segdata == NULL) { - return OPJ_FALSE; - } - t1->segdatabuffer = segdata; - memset(t1->segdatabuffer + seglen, 0, OPJ_COMMON_CBLK_DATA_EXTRA); - t1->segdatabuffersize = seglen + OPJ_COMMON_CBLK_DATA_EXTRA; - } - - /* Concatenate all segments chunks */ - segdata = t1->segdatabuffer; - seglen = 0; - for (i = 0; i < seg->numchunks; i++) { - memcpy(segdata + seglen, seg->chunks[i].data, seg->chunks[i].len); - seglen += seg->chunks[i].len; - } - } - if (type == T1_TYPE_RAW) { - opj_mqc_raw_init_dec(mqc, segdata, seglen, + opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len, OPJ_COMMON_CBLK_DATA_EXTRA); } else { - opj_mqc_init_dec(mqc, segdata, seglen, + opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len, OPJ_COMMON_CBLK_DATA_EXTRA); } + cblkdataindex += seg->len; for (passno = 0; (passno < seg->real_num_passes) && (bpno_plus_one >= 1); ++passno) { diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index da8b0c80..5aa6a07b 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -202,10 +202,12 @@ typedef struct opj_t1 { OPJ_BOOL encoder; /* Thre 3 variables below are only used by the decoder */ - OPJ_BOOL mustuse_segdatabuffer; /* set to TRUE in multithreaded context */ - OPJ_BYTE - *segdatabuffer; /* Temporary buffer to concatenate all chunks of a segment */ - OPJ_UINT32 segdatabuffersize; /* Maximum size available in segdatabuffer */ + /* set to TRUE in multithreaded context */ + OPJ_BOOL mustuse_cblkdatabuffer; + /* Temporary buffer to concatenate all chunks of a codebock */ + OPJ_BYTE *cblkdatabuffer; + /* Maximum size available in cblkdatabuffer */ + OPJ_UINT32 cblkdatabuffersize; } opj_t1_t; /** @name Exported functions */ diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c index 5d5e33ea..c8cfcc26 100644 --- a/src/lib/openjp2/t2.c +++ b/src/lib/openjp2/t2.c @@ -1287,25 +1287,26 @@ static OPJ_BOOL opj_t2_read_packet_data(opj_t2_t* p_t2, #endif /* USE_JPWL */ - if (l_seg->numchunks == l_seg->numchunksalloc) { - OPJ_UINT32 l_numchunksalloc = l_seg->numchunksalloc * 2 + 1; + if (l_cblk->numchunks == l_cblk->numchunksalloc) { + OPJ_UINT32 l_numchunksalloc = l_cblk->numchunksalloc * 2 + 1; opj_tcd_seg_data_chunk_t* l_chunks = - (opj_tcd_seg_data_chunk_t*)opj_realloc(l_seg->chunks, + (opj_tcd_seg_data_chunk_t*)opj_realloc(l_cblk->chunks, l_numchunksalloc * sizeof(opj_tcd_seg_data_chunk_t)); if (l_chunks == NULL) { opj_event_msg(p_manager, EVT_ERROR, "cannot allocate opj_tcd_seg_data_chunk_t* array"); return OPJ_FALSE; } - l_seg->chunks = l_chunks; - l_seg->numchunksalloc = l_numchunksalloc; + l_cblk->chunks = l_chunks; + l_cblk->numchunksalloc = l_numchunksalloc; } - l_seg->chunks[l_seg->numchunks].data = l_current_data; - l_seg->chunks[l_seg->numchunks].len = l_seg->newlen; - l_seg->numchunks ++; + l_cblk->chunks[l_cblk->numchunks].data = l_current_data; + l_cblk->chunks[l_cblk->numchunks].len = l_seg->newlen; + l_cblk->numchunks ++; l_current_data += l_seg->newlen; + l_seg->len += l_seg->newlen; l_seg->numpasses += l_seg->numnewpasses; l_cblk->numnewpasses -= l_seg->numnewpasses; diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 4c4839d0..dd9faced 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1213,11 +1213,7 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t * void opj_tcd_reinit_segment(opj_tcd_seg_t* seg) { - opj_tcd_seg_data_chunk_t *l_chunks = seg->chunks; - OPJ_UINT32 l_numchunksalloc = seg->numchunksalloc; memset(seg, 0, sizeof(opj_tcd_seg_t)); - seg->chunks = l_chunks; - seg->numchunksalloc = l_numchunksalloc; } /** @@ -1241,6 +1237,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * /* sanitize */ opj_tcd_seg_t * l_segs = p_code_block->segs; OPJ_UINT32 l_current_max_segs = p_code_block->m_current_max_segs; + opj_tcd_seg_data_chunk_t* l_chunks = p_code_block->chunks; + OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc; OPJ_UINT32 i; memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t)); @@ -1249,6 +1247,8 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t * for (i = 0; i < l_current_max_segs; ++i) { opj_tcd_reinit_segment(&l_segs[i]); } + p_code_block->chunks = l_chunks; + p_code_block->numchunksalloc = l_numchunksalloc; } return OPJ_TRUE; @@ -1952,14 +1952,15 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct) for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) { if (l_code_block->segs) { - OPJ_UINT32 i; - for (i = 0; i < l_code_block->m_current_max_segs; ++ i) { - opj_free(l_code_block->segs[i].chunks); - } opj_free(l_code_block->segs); l_code_block->segs = 00; } + if (l_code_block->chunks) { + opj_free(l_code_block->chunks); + l_code_block->chunks = 00; + } + ++l_code_block; } diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index 0de888d5..29cd8829 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -89,12 +89,12 @@ typedef struct opj_tcd_cblk_enc { } opj_tcd_cblk_enc_t; -/** Chunk of codestream data that is part of a T1 segment */ +/** Chunk of codestream data that is part of a code block */ typedef struct opj_tcd_seg_data_chunk { - OPJ_BYTE * - data; /* Point to tilepart buffer. We don't make a copy ! - So the tilepart buffer must be kept alive - as long as we need to decode the codeblocks */ + /* Point to tilepart buffer. We don't make a copy ! + So the tilepart buffer must be kept alive + as long as we need to decode the codeblocks */ + OPJ_BYTE * data; OPJ_UINT32 len; /* Usable length of data */ } opj_tcd_seg_data_chunk_t; @@ -102,35 +102,37 @@ typedef struct opj_tcd_seg_data_chunk { * A segment represent a number of consecutive coding passes, without termination * of MQC or RAW between them. */ typedef struct opj_tcd_seg { - opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */ - OPJ_UINT32 numchunks; /* Number of valid chunks items */ - OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */ - OPJ_UINT32 - numpasses; /* Number of passes decoded. Including those that we skip */ - OPJ_UINT32 - real_num_passes; /* Number of passes actually to be decoded. To be used for code-block decoding */ - OPJ_UINT32 maxpasses; /* Maximum number of passes for this segment */ - OPJ_UINT32 - numnewpasses; /* Number of new passes for current packed. Transitory value */ - OPJ_UINT32 - newlen; /* Codestream length for this segment for current packed. Transitory value */ + OPJ_UINT32 len; /* Size of data related to this segment */ + /* Number of passes decoded. Including those that we skip */ + OPJ_UINT32 numpasses; + /* Number of passes actually to be decoded. To be used for code-block decoding */ + OPJ_UINT32 real_num_passes; + /* Maximum number of passes for this segment */ + OPJ_UINT32 maxpasses; + /* Number of new passes for current packed. Transitory value */ + OPJ_UINT32 numnewpasses; + /* Codestream length for this segment for current packed. Transitory value */ + OPJ_UINT32 newlen; } opj_tcd_seg_t; /* Code-block for decoding */ typedef struct opj_tcd_cblk_dec { opj_tcd_seg_t* segs; /* segments information */ - OPJ_INT32 x0, y0, x1, - y1; /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */ + opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */ + /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */ + OPJ_INT32 x0, y0, x1, y1; OPJ_UINT32 numbps; - OPJ_UINT32 - numlenbits; /* number of bits for len,, for the current packet. Transitory value */ - OPJ_UINT32 - numnewpasses; /* number of pass added to the code-blocks, for the current packet. Transitory value */ - OPJ_UINT32 - numsegs; /* number of segments, including those of packet we skip */ - OPJ_UINT32 - real_num_segs; /* number of segments, to be used for code block decoding */ + /* number of bits for len, for the current packet. Transitory value */ + OPJ_UINT32 numlenbits; + /* number of pass added to the code-blocks, for the current packet. Transitory value */ + OPJ_UINT32 numnewpasses; + /* number of segments, including those of packet we skip */ + OPJ_UINT32 numsegs; + /* number of segments, to be used for code block decoding */ + OPJ_UINT32 real_num_segs; OPJ_UINT32 m_current_max_segs; /* allocated number of segs[] items */ + OPJ_UINT32 numchunks; /* Number of valid chunks items */ + OPJ_UINT32 numchunksalloc; /* Number of chunks item allocated */ } opj_tcd_cblk_dec_t; /** @@ -397,7 +399,7 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec); */ OPJ_BOOL opj_tcd_is_band_empty(opj_tcd_band_t* band); -/** Reinitialize a segment, without deallocating its chunks array */ +/** Reinitialize a segment */ void opj_tcd_reinit_segment(opj_tcd_seg_t* seg); /* ----------------------------------------------------------------------- */