Significant speed-up rate allocation by rate/distoratio ratio
- Avoid doing 128 iterations all the time, and stop when the threshold doesn't vary much - Avoid calling costly opj_t2_encode_packets() repeatdly when bisecting the layer ratio if the truncation points haven't changed since the last iteration. When used with the GDAL gdal_translate application to convert a 11977 x 8745 raster with data type UInt16 and 8 channels, the conversion time to JPEG2000 with 20 quality layers using disto/rate allocation ( -co "IC=C8" -co "JPEG2000_DRIVER=JP2OPENJPEG" -co "PROFILE=NPJE_NUMERICALLY_LOSSLESS" creation options of the GDAL NITF driver) goes from 5m56 wall clock (8m20s total, 12 vCPUs) down to 1m16 wall clock (3m45 total).
This commit is contained in:
parent
e9fc08a52a
commit
3d9bcd3753
|
@ -42,6 +42,8 @@
|
||||||
#include "opj_includes.h"
|
#include "opj_includes.h"
|
||||||
#include "opj_common.h"
|
#include "opj_common.h"
|
||||||
|
|
||||||
|
// #define DEBUG_RATE_ALLOC
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------- */
|
||||||
|
|
||||||
/* TODO MSD: */
|
/* TODO MSD: */
|
||||||
|
@ -143,6 +145,9 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
|
||||||
*/
|
*/
|
||||||
static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct);
|
static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct);
|
||||||
|
|
||||||
|
static
|
||||||
|
void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
|
||||||
|
OPJ_UINT32 final);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Free the memory allocated for encoding
|
Free the memory allocated for encoding
|
||||||
|
@ -224,6 +229,7 @@ opj_tcd_t* opj_tcd_create(OPJ_BOOL p_is_decoder)
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static
|
||||||
void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
|
void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
|
||||||
{
|
{
|
||||||
OPJ_UINT32 layno;
|
OPJ_UINT32 layno;
|
||||||
|
@ -234,15 +240,21 @@ void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void opj_tcd_makelayer(opj_tcd_t *tcd,
|
/* ----------------------------------------------------------------------- */
|
||||||
OPJ_UINT32 layno,
|
|
||||||
OPJ_FLOAT64 thresh,
|
/** Returns OPJ_TRUE if the layer allocation is unchanged w.r.t to the previous
|
||||||
OPJ_UINT32 final)
|
* invokation with a different threshold */
|
||||||
|
static
|
||||||
|
OPJ_BOOL opj_tcd_makelayer(opj_tcd_t *tcd,
|
||||||
|
OPJ_UINT32 layno,
|
||||||
|
OPJ_FLOAT64 thresh,
|
||||||
|
OPJ_UINT32 final)
|
||||||
{
|
{
|
||||||
OPJ_UINT32 compno, resno, bandno, precno, cblkno;
|
OPJ_UINT32 compno, resno, bandno, precno, cblkno;
|
||||||
OPJ_UINT32 passno;
|
OPJ_UINT32 passno;
|
||||||
|
|
||||||
opj_tcd_tile_t *tcd_tile = tcd->tcd_image->tiles;
|
opj_tcd_tile_t *tcd_tile = tcd->tcd_image->tiles;
|
||||||
|
OPJ_BOOL layer_allocation_is_same = OPJ_TRUE;
|
||||||
|
|
||||||
tcd_tile->distolayer[layno] = 0; /* fixed_quality */
|
tcd_tile->distolayer[layno] = 0; /* fixed_quality */
|
||||||
|
|
||||||
|
@ -304,7 +316,10 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
layer->numpasses = n - cblk->numpassesinlayers;
|
if (layer->numpasses != n - cblk->numpassesinlayers) {
|
||||||
|
layer_allocation_is_same = OPJ_FALSE;
|
||||||
|
layer->numpasses = n - cblk->numpassesinlayers;
|
||||||
|
}
|
||||||
|
|
||||||
if (!layer->numpasses) {
|
if (!layer->numpasses) {
|
||||||
layer->disto = 0;
|
layer->disto = 0;
|
||||||
|
@ -333,8 +348,10 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return layer_allocation_is_same;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
|
void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
|
||||||
OPJ_UINT32 final)
|
OPJ_UINT32 final)
|
||||||
{
|
{
|
||||||
|
@ -440,6 +457,11 @@ void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Rate allocation for the following methods:
|
||||||
|
* - allocation by rate/distortio (m_disto_alloc == 1)
|
||||||
|
* - allocation by fixed quality (m_fixed_quality == 1)
|
||||||
|
*/
|
||||||
|
static
|
||||||
OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
|
OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
|
||||||
OPJ_BYTE *dest,
|
OPJ_BYTE *dest,
|
||||||
OPJ_UINT32 * p_data_written,
|
OPJ_UINT32 * p_data_written,
|
||||||
|
@ -561,6 +583,7 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
|
||||||
(tcd_tcp->distoratio[layno] > 0.0))) {
|
(tcd_tcp->distoratio[layno] > 0.0))) {
|
||||||
opj_t2_t*t2 = opj_t2_create(tcd->image, cp);
|
opj_t2_t*t2 = opj_t2_create(tcd->image, cp);
|
||||||
OPJ_FLOAT64 thresh = 0;
|
OPJ_FLOAT64 thresh = 0;
|
||||||
|
OPJ_BOOL last_layer_allocation_ok = OPJ_FALSE;
|
||||||
|
|
||||||
if (t2 == 00) {
|
if (t2 == 00) {
|
||||||
return OPJ_FALSE;
|
return OPJ_FALSE;
|
||||||
|
@ -568,11 +591,27 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
|
||||||
|
|
||||||
for (i = 0; i < 128; ++i) {
|
for (i = 0; i < 128; ++i) {
|
||||||
OPJ_FLOAT64 distoachieved = 0; /* fixed_quality */
|
OPJ_FLOAT64 distoachieved = 0; /* fixed_quality */
|
||||||
|
OPJ_BOOL layer_allocation_is_same;
|
||||||
|
|
||||||
thresh = (lo + hi) / 2;
|
OPJ_FLOAT64 new_thresh = (lo + hi) / 2;
|
||||||
|
/* Stop iterating when the threshold has stabilized enough */
|
||||||
opj_tcd_makelayer(tcd, layno, thresh, 0);
|
/* 0.5 * 1e-5 is somewhat arbitrary, but has been selected */
|
||||||
|
/* so that this doesn't change the results of the regression */
|
||||||
|
/* test suite. */
|
||||||
|
if (fabs(new_thresh - thresh) <= 0.5 * 1e-5 * thresh) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
thresh = new_thresh;
|
||||||
|
#ifdef DEBUG_RATE_ALLOC
|
||||||
|
opj_event_msg(p_manager, EVT_INFO, "layno=%u, iter=%u, thresh=%g",
|
||||||
|
layno, i, new_thresh);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
layer_allocation_is_same = opj_tcd_makelayer(tcd, layno, thresh, 0) && i != 0;
|
||||||
|
#ifdef DEBUG_RATE_ALLOC
|
||||||
|
opj_event_msg(p_manager, EVT_INFO, "--> layer_allocation_is_same = %d",
|
||||||
|
layer_allocation_is_same);
|
||||||
|
#endif
|
||||||
if (cp->m_specific_param.m_enc.m_fixed_quality) { /* fixed_quality */
|
if (cp->m_specific_param.m_enc.m_fixed_quality) { /* fixed_quality */
|
||||||
if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) {
|
if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) {
|
||||||
if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
|
if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
|
||||||
|
@ -605,17 +644,41 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
|
||||||
}
|
}
|
||||||
lo = thresh;
|
lo = thresh;
|
||||||
}
|
}
|
||||||
} else {
|
} else { /* Disto/rate based optimization */
|
||||||
if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
|
/* Check if the layer allocation done by opj_tcd_makelayer()
|
||||||
p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
|
* is compatible of the maximum rate allocation. If not,
|
||||||
tcd->cur_pino,
|
* retry with a higher threshold.
|
||||||
THRESH_CALC, p_manager)) {
|
* If OK, try with a lower threshold.
|
||||||
/* TODO: what to do with l ??? seek / tell ??? */
|
* Call opj_t2_encode_packets() only if opj_tcd_makelayer()
|
||||||
/* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */
|
* has resulted in different truncation points since its last
|
||||||
|
* call. */
|
||||||
|
if ((layer_allocation_is_same && !last_layer_allocation_ok) ||
|
||||||
|
(!layer_allocation_is_same &&
|
||||||
|
! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
|
||||||
|
p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
|
||||||
|
tcd->cur_pino,
|
||||||
|
THRESH_CALC, p_manager))) {
|
||||||
|
|
||||||
|
#ifdef DEBUG_RATE_ALLOC
|
||||||
|
if (!layer_allocation_is_same) {
|
||||||
|
opj_event_msg(p_manager, EVT_INFO,
|
||||||
|
"--> check rate alloc failed (> maxlen=%u)\n", maxlen);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
last_layer_allocation_ok = OPJ_FALSE;
|
||||||
lo = thresh;
|
lo = thresh;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG_RATE_ALLOC
|
||||||
|
if (!layer_allocation_is_same) {
|
||||||
|
opj_event_msg(p_manager, EVT_INFO,
|
||||||
|
"--> check rate alloc success (len=%u <= maxlen=%u)\n", *p_data_written,
|
||||||
|
maxlen);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
last_layer_allocation_ok = OPJ_TRUE;
|
||||||
hi = thresh;
|
hi = thresh;
|
||||||
stable_thresh = thresh;
|
stable_thresh = thresh;
|
||||||
}
|
}
|
||||||
|
|
|
@ -369,23 +369,6 @@ OPJ_BOOL opj_tcd_init(opj_tcd_t *p_tcd,
|
||||||
OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
|
OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
|
||||||
opj_event_mgr_t* p_manager);
|
opj_event_mgr_t* p_manager);
|
||||||
|
|
||||||
void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
|
|
||||||
OPJ_UINT32 final);
|
|
||||||
|
|
||||||
void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd);
|
|
||||||
|
|
||||||
void opj_tcd_makelayer(opj_tcd_t *tcd,
|
|
||||||
OPJ_UINT32 layno,
|
|
||||||
OPJ_FLOAT64 thresh,
|
|
||||||
OPJ_UINT32 final);
|
|
||||||
|
|
||||||
OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
|
|
||||||
OPJ_BYTE *dest,
|
|
||||||
OPJ_UINT32 * p_data_written,
|
|
||||||
OPJ_UINT32 len,
|
|
||||||
opj_codestream_info_t *cstr_info,
|
|
||||||
opj_event_mgr_t *p_manager);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the maximum tile size that will be taken by the tile once decoded.
|
* Gets the maximum tile size that will be taken by the tile once decoded.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue