Forward DWT: small code refactoring to allow future improvements for the vertical pass

This commit is contained in:
Even Rouault 2020-05-22 15:58:47 +02:00
parent 33d3d0de07
commit e69fa09f60
No known key found for this signature in database
GPG Key ID: 33EBBFC47B3DD87D
1 changed files with 108 additions and 37 deletions

View File

@ -157,10 +157,18 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
opj_tcd_tilecomp_t* tilec, opj_tcd_tilecomp_t* tilec,
OPJ_UINT32 numres); OPJ_UINT32 numres);
/* Forward transform, for the vertical pass, processing cols columns */
/* where cols <= NB_ELTS_V8 */
/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ /* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
typedef void (*opj_encode_one_row_fnptr_type)(void *, OPJ_INT32, OPJ_INT32, typedef void (*opj_encode_and_deinterleave_v_fnptr_type)(
OPJ_INT32); void *array,
void *tmp,
OPJ_UINT32 height,
OPJ_BOOL even,
OPJ_UINT32 stride_width,
OPJ_UINT32 cols);
/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)( typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)(
void *row, void *row,
void *tmp, void *tmp,
@ -169,7 +177,7 @@ typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)(
static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
opj_tcd_tilecomp_t * tilec, opj_tcd_tilecomp_t * tilec,
opj_encode_one_row_fnptr_type p_function, opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
opj_encode_and_deinterleave_h_one_row_fnptr_type opj_encode_and_deinterleave_h_one_row_fnptr_type
p_encode_and_deinterleave_h_one_row); p_encode_and_deinterleave_h_one_row);
@ -1226,7 +1234,7 @@ typedef struct {
OPJ_INT32 * OPJ_RESTRICT tiledp; OPJ_INT32 * OPJ_RESTRICT tiledp;
OPJ_UINT32 min_j; OPJ_UINT32 min_j;
OPJ_UINT32 max_j; OPJ_UINT32 max_j;
opj_encode_one_row_fnptr_type p_function; opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v;
} opj_dwt_encode_v_job_t; } opj_dwt_encode_v_job_t;
static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls) static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls)
@ -1236,29 +1244,90 @@ static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls)
(void)tls; (void)tls;
job = (opj_dwt_encode_v_job_t*)user_data; job = (opj_dwt_encode_v_job_t*)user_data;
for (j = job->min_j; j < job->max_j; j++) { for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) {
OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j; (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
OPJ_UINT32 k; job->v.mem,
for (k = 0; k < job->rh; ++k) { job->rh,
job->v.mem[k] = aj[k * job->w]; job->v.cas == 0,
} job->w,
NB_ELTS_V8);
(*job->p_function)(job->v.mem, job->v.dn, job->v.sn, job->v.cas); }
if (j < job->max_j) {
opj_dwt_deinterleave_v(job->v.mem, aj, job->v.dn, job->v.sn, job->w, (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
job->v.cas); job->v.mem,
job->rh,
job->v.cas == 0,
job->w,
job->max_j - j);
} }
opj_aligned_free(job->v.mem); opj_aligned_free(job->v.mem);
opj_free(job); opj_free(job);
} }
/* Forward 5-3 transform, for the vertical pass, processing cols columns */
/* where cols <= NB_ELTS_V8 */
static void opj_dwt_encode_and_deinterleave_v(
void *arrayIn,
void *tmpIn,
OPJ_UINT32 height,
OPJ_BOOL even,
OPJ_UINT32 stride_width,
OPJ_UINT32 cols)
{
OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn;
OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn;
OPJ_UINT32 c;
const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1);
const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn);
for (c = 0; c < cols; c++) {
OPJ_UINT32 k;
for (k = 0; k < height; ++k) {
tmp[k] = array[c + k * stride_width];
}
opj_dwt_encode_1(tmp, dn, sn, even ? 0 : 1);
opj_dwt_deinterleave_v(tmp, array + c, dn, sn, stride_width, even ? 0 : 1);
}
}
/* Forward 9-7 transform, for the vertical pass, processing cols columns */
/* where cols <= NB_ELTS_V8 */
static void opj_dwt_encode_and_deinterleave_v_real(
void *arrayIn,
void *tmpIn,
OPJ_UINT32 height,
OPJ_BOOL even,
OPJ_UINT32 stride_width,
OPJ_UINT32 cols)
{
OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn;
OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn;
OPJ_UINT32 c;
const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1);
const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn);
for (c = 0; c < cols; c++) {
OPJ_UINT32 k;
for (k = 0; k < height; ++k) {
tmp[k] = array[c + k * stride_width];
}
opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1);
opj_dwt_deinterleave_v((OPJ_INT32*)tmpIn,
((OPJ_INT32*)(arrayIn)) + c,
dn, sn, stride_width, even ? 0 : 1);
}
}
/* <summary> */ /* <summary> */
/* Forward 5-3 wavelet transform in 2-D. */ /* Forward 5-3 wavelet transform in 2-D. */
/* </summary> */ /* </summary> */
static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
opj_tcd_tilecomp_t * tilec, opj_tcd_tilecomp_t * tilec,
opj_encode_one_row_fnptr_type p_function, opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
opj_encode_and_deinterleave_h_one_row_fnptr_type opj_encode_and_deinterleave_h_one_row_fnptr_type
p_encode_and_deinterleave_h_one_row) p_encode_and_deinterleave_h_one_row)
{ {
@ -1282,11 +1351,11 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
/* overflow check */ /* overflow check */
if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) { if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) {
/* FIXME event manager error callback */ /* FIXME event manager error callback */
return OPJ_FALSE; return OPJ_FALSE;
} }
l_data_size *= sizeof(OPJ_INT32); l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32);
bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
/* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */ /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */
/* in that case, so do not error out */ /* in that case, so do not error out */
@ -1319,17 +1388,22 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
dn = (OPJ_INT32)(rh - rh1); dn = (OPJ_INT32)(rh - rh1);
/* Perform vertical pass */ /* Perform vertical pass */
if (num_threads <= 1 || rw <= 1) { if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
for (j = 0; j < rw; ++j) { for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) {
OPJ_INT32* OPJ_RESTRICT aj = tiledp + j; p_encode_and_deinterleave_v(tiledp + j,
OPJ_UINT32 k; bj,
for (k = 0; k < rh; ++k) { rh,
bj[k] = aj[k * w]; cas_col == 0,
} w,
NB_ELTS_V8);
(*p_function)(bj, dn, sn, cas_col); }
if (j < rw) {
opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); p_encode_and_deinterleave_v(tiledp + j,
bj,
rh,
cas_col == 0,
w,
rw - j);
} }
} else { } else {
OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@ -1338,7 +1412,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
if (rw < num_jobs) { if (rw < num_jobs) {
num_jobs = rw; num_jobs = rw;
} }
step_j = (rw / num_jobs); step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
for (j = 0; j < num_jobs; j++) { for (j = 0; j < num_jobs; j++) {
opj_dwt_encode_v_job_t* job; opj_dwt_encode_v_job_t* job;
@ -1363,11 +1437,8 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
job->w = w; job->w = w;
job->tiledp = tiledp; job->tiledp = tiledp;
job->min_j = j * step_j; job->min_j = j * step_j;
job->max_j = (j + 1U) * step_j; /* this can overflow */ job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j;
if (j == (num_jobs - 1U)) { /* this will take care of the overflow */ job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v;
job->max_j = rw;
}
job->p_function = p_function;
opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job); opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job);
} }
opj_thread_pool_wait_completion(tp, 0); opj_thread_pool_wait_completion(tp, 0);
@ -1440,7 +1511,7 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd,
opj_tcd_tilecomp_t * tilec) opj_tcd_tilecomp_t * tilec)
{ {
return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
opj_dwt_encode_1, opj_dwt_encode_and_deinterleave_v,
opj_dwt_encode_and_deinterleave_h_one_row); opj_dwt_encode_and_deinterleave_h_one_row);
} }
@ -1480,7 +1551,7 @@ OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd,
opj_tcd_tilecomp_t * tilec) opj_tcd_tilecomp_t * tilec)
{ {
return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
opj_dwt_encode_1_real, opj_dwt_encode_and_deinterleave_v_real,
opj_dwt_encode_and_deinterleave_h_one_row_real); opj_dwt_encode_and_deinterleave_h_one_row_real);
} }