opj_dwt_decode_partial_97(): simplify/more efficient use of sparse arrays in vertical pass

This commit is contained in:
Even Rouault 2017-09-01 16:31:06 +02:00
parent ae19001ba4
commit 83b5a168ec
1 changed files with 41 additions and 53 deletions

View File

@ -2248,24 +2248,21 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
OPJ_UINT32 sa_col, OPJ_UINT32 sa_col,
OPJ_UINT32 nb_elts_read) OPJ_UINT32 nb_elts_read)
{ {
OPJ_UINT32 i;
for (i = 0; i < nb_elts_read; i++) {
OPJ_BOOL ret; OPJ_BOOL ret;
ret = opj_sparse_array_int32_read(sa, ret = opj_sparse_array_int32_read(sa,
sa_col + i, dwt->win_l_x0, sa_col, dwt->win_l_x0,
sa_col + i + 1, dwt->win_l_x1, sa_col + nb_elts_read, dwt->win_l_x1,
(OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0),
0, 8, OPJ_TRUE); 1, 8, OPJ_TRUE);
assert(ret); assert(ret);
ret = opj_sparse_array_int32_read(sa, ret = opj_sparse_array_int32_read(sa,
sa_col + i, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0,
sa_col + i + 1, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1,
(OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0),
0, 8, OPJ_TRUE); 1, 8, OPJ_TRUE);
assert(ret); assert(ret);
OPJ_UNUSED(ret); OPJ_UNUSED(ret);
} }
}
#ifdef __SSE__ #ifdef __SSE__
@ -2743,15 +2740,13 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
if ((j + 3 >= win_ll_y0 && j < win_ll_y1) || if ((j + 3 >= win_ll_y0 && j < win_ll_y1) ||
(j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn &&
j < win_lh_y1 + (OPJ_UINT32)v.sn)) { j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
OPJ_UINT32 k;
opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j)); opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j));
opj_v4dwt_decode(&h); opj_v4dwt_decode(&h);
for (k = 0; k < 4; k++) {
if (!opj_sparse_array_int32_write(sa, if (!opj_sparse_array_int32_write(sa,
win_tr_x0, j + k, win_tr_x0, j,
win_tr_x1, j + k + 1, win_tr_x1, j + 4,
(OPJ_INT32*)&h.wavelet[win_tr_x0].f[k], (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
4, 0, OPJ_TRUE)) { 4, 1, OPJ_TRUE)) {
/* FIXME event manager error callback */ /* FIXME event manager error callback */
opj_sparse_array_int32_free(sa); opj_sparse_array_int32_free(sa);
opj_aligned_free(h.wavelet); opj_aligned_free(h.wavelet);
@ -2759,28 +2754,24 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
} }
} }
} }
}
if (j < rh && if (j < rh &&
((j + 3 >= win_ll_y0 && j < win_ll_y1) || ((j + 3 >= win_ll_y0 && j < win_ll_y1) ||
(j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn &&
j < win_lh_y1 + (OPJ_UINT32)v.sn))) { j < win_lh_y1 + (OPJ_UINT32)v.sn))) {
OPJ_UINT32 k;
opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j); opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j);
opj_v4dwt_decode(&h); opj_v4dwt_decode(&h);
for (k = 0; k < rh - j; k++) {
if (!opj_sparse_array_int32_write(sa, if (!opj_sparse_array_int32_write(sa,
win_tr_x0, j + k, win_tr_x0, j,
win_tr_x1, j + k + 1, win_tr_x1, rh,
(OPJ_INT32*)&h.wavelet[win_tr_x0].f[k], (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
4, 0, OPJ_TRUE)) { 4, 1, OPJ_TRUE)) {
/* FIXME event manager error callback */ /* FIXME event manager error callback */
opj_sparse_array_int32_free(sa); opj_sparse_array_int32_free(sa);
opj_aligned_free(h.wavelet); opj_aligned_free(h.wavelet);
return OPJ_FALSE; return OPJ_FALSE;
} }
} }
}
v.win_l_x0 = win_ll_y0; v.win_l_x0 = win_ll_y0;
v.win_l_x1 = win_ll_y1; v.win_l_x1 = win_ll_y1;
@ -2788,17 +2779,15 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
v.win_h_x1 = win_lh_y1; v.win_h_x1 = win_lh_y1;
for (j = win_tr_x0; j < win_tr_x1; j += 4) { for (j = win_tr_x0; j < win_tr_x1; j += 4) {
OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j); OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j);
OPJ_UINT32 k;
opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts); opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts);
opj_v4dwt_decode(&v); opj_v4dwt_decode(&v);
for (k = 0; k < nb_elts; k++) {
if (!opj_sparse_array_int32_write(sa, if (!opj_sparse_array_int32_write(sa,
j + k, win_tr_y0, j, win_tr_y0,
j + k + 1, win_tr_y1, j + nb_elts, win_tr_y1,
(OPJ_INT32*)&h.wavelet[win_tr_y0].f[k], (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0],
0, 4, OPJ_TRUE)) { 1, 4, OPJ_TRUE)) {
/* FIXME event manager error callback */ /* FIXME event manager error callback */
opj_sparse_array_int32_free(sa); opj_sparse_array_int32_free(sa);
opj_aligned_free(h.wavelet); opj_aligned_free(h.wavelet);
@ -2806,7 +2795,6 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
} }
} }
} }
}
{ {
OPJ_BOOL ret = opj_sparse_array_int32_read(sa, OPJ_BOOL ret = opj_sparse_array_int32_read(sa,