From 5fbb8b2645a085391b070162d8551aa960caab6a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 16:38:44 +0200 Subject: [PATCH] Use thread-pool for T1 decoding --- src/lib/openjp2/opj_includes.h | 1 + src/lib/openjp2/t1.c | 224 +++++++++++++++++++++------------ src/lib/openjp2/t1.h | 3 +- src/lib/openjp2/tcd.c | 20 +-- src/lib/openjp2/tls_keys.h | 37 ++++++ 5 files changed, 191 insertions(+), 94 deletions(-) create mode 100644 src/lib/openjp2/tls_keys.h diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index c2cc31fa..e835fae4 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -183,6 +183,7 @@ static INLINE long opj_lrintf(float f) { #include "cio.h" #include "thread.h" +#include "tls_keys.h" #include "image.h" #include "invert.h" diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index e1097bf5..adf64bb4 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1540,13 +1540,140 @@ void opj_t1_destroy(opj_t1_t *p_t1) opj_free(p_t1); } -OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, - opj_tcd_tilecomp_t* tilec, - opj_tccp_t* tccp - ) +typedef struct +{ + OPJ_UINT32 resno; + opj_tcd_cblk_dec_t* cblk; + opj_tcd_band_t* band; + opj_tcd_tilecomp_t* tilec; + opj_tccp_t* tccp; + volatile OPJ_BOOL* pret; +} opj_t1_cblk_decode_processing_job_t; + +static void opj_t1_destroy_wrapper(void* t1) +{ + opj_t1_destroy( (opj_t1_t*) t1 ); +} + +static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) +{ + opj_tcd_cblk_dec_t* cblk; + opj_tcd_band_t* band; + opj_tcd_tilecomp_t* tilec; + opj_tccp_t* tccp; + OPJ_INT32* restrict datap; + OPJ_UINT32 cblk_w, cblk_h; + OPJ_INT32 x, y; + OPJ_UINT32 i, j; + opj_t1_cblk_decode_processing_job_t* job; + opj_t1_t* t1; + OPJ_UINT32 resno; + OPJ_UINT32 tile_w; + + job = (opj_t1_cblk_decode_processing_job_t*) user_data; + resno = job->resno; + cblk = job->cblk; + band = job->band; + tilec = job->tilec; + tccp = job->tccp; + tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + + if( !*(job->pret) ) + { + opj_free(job); + return; + } + + t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); + if( t1 == NULL ) + { + t1 = opj_t1_create( OPJ_FALSE ); + opj_tls_set( tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper ); + } + + if (OPJ_FALSE == opj_t1_decode_cblk( + t1, + cblk, + band->bandno, + (OPJ_UINT32)tccp->roishift, + tccp->cblksty)) { + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + + x = cblk->x0 - band->x0; + y = cblk->y0 - band->y0; + if (band->bandno & 1) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + x += pres->x1 - pres->x0; + } + if (band->bandno & 2) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + y += pres->y1 - pres->y0; + } + + datap=t1->data; + cblk_w = t1->w; + cblk_h = t1->h; + + if (tccp->roishift) { + OPJ_INT32 thresh = 1 << tccp->roishift; + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + OPJ_INT32 val = datap[(j * cblk_w) + i]; + OPJ_INT32 mag = abs(val); + if (mag >= thresh) { + mag >>= tccp->roishift; + datap[(j * cblk_w) + i] = val < 0 ? -mag : mag; + } + } + } + } + if (tccp->qmfbid == 1) { + OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + for (j = 0; j < cblk_h; ++j) { + i = 0; + for (; i < (cblk_w & ~3); i += 4) { + OPJ_INT32 tmp0 = datap[(j * cblk_w) + i]; + OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1]; + OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2]; + OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3]; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2; + } + for (; i < cblk_w; ++i) { + OPJ_INT32 tmp = datap[(j * cblk_w) + i]; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2; + } + } + } else { /* if (tccp->qmfbid == 0) */ + OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + for (j = 0; j < cblk_h; ++j) { + OPJ_FLOAT32* restrict tiledp2 = tiledp; + for (i = 0; i < cblk_w; ++i) { + OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize; + *tiledp2 = tmp; + datap++; + tiledp2++; + } + tiledp += tile_w; + } + } + + opj_free(job); +} + + +void opj_t1_decode_cblks( opj_thread_pool_t* tp, + volatile OPJ_BOOL* pret, + opj_tcd_tilecomp_t* tilec, + opj_tccp_t* tccp + ) { OPJ_UINT32 resno, bandno, precno, cblkno; - OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) { opj_tcd_resolution_t* res = &tilec->resolutions[resno]; @@ -1559,85 +1686,24 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; - OPJ_INT32* restrict datap; - OPJ_UINT32 cblk_w, cblk_h; - OPJ_INT32 x, y; - OPJ_UINT32 i, j; + opj_t1_cblk_decode_processing_job_t* job; - if (OPJ_FALSE == opj_t1_decode_cblk( - t1, - cblk, - band->bandno, - (OPJ_UINT32)tccp->roishift, - tccp->cblksty)) { - return OPJ_FALSE; - } - - x = cblk->x0 - band->x0; - y = cblk->y0 - band->y0; - if (band->bandno & 1) { - opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; - x += pres->x1 - pres->x0; - } - if (band->bandno & 2) { - opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; - y += pres->y1 - pres->y0; - } - - datap=t1->data; - cblk_w = t1->w; - cblk_h = t1->h; - - if (tccp->roishift) { - OPJ_INT32 thresh = 1 << tccp->roishift; - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - OPJ_INT32 val = datap[(j * cblk_w) + i]; - OPJ_INT32 mag = abs(val); - if (mag >= thresh) { - mag >>= tccp->roishift; - datap[(j * cblk_w) + i] = val < 0 ? -mag : mag; - } - } - } - } - if (tccp->qmfbid == 1) { - OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; - for (j = 0; j < cblk_h; ++j) { - i = 0; - for (; i < (cblk_w & ~3); i += 4) { - OPJ_INT32 tmp0 = datap[(j * cblk_w) + i]; - OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1]; - OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2]; - OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2; - } - for (; i < cblk_w; ++i) { - OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2; - } - } - } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; - for (j = 0; j < cblk_h; ++j) { - OPJ_FLOAT32* restrict tiledp2 = tiledp; - for (i = 0; i < cblk_w; ++i) { - OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize; - *tiledp2 = tmp; - datap++; - tiledp2++; - } - tiledp += tile_w; - } - } + job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, sizeof(opj_t1_cblk_decode_processing_job_t)); + job->resno = resno; + job->cblk = cblk; + job->band = band; + job->tilec = tilec; + job->tccp = tccp; + job->pret = pret; + opj_thread_pool_submit_job( tp, opj_t1_clbl_decode_processor, job ); + if( !(*pret) ) + return; } /* cblkno */ } /* precno */ } /* bandno */ } /* resno */ - return OPJ_TRUE; + + return; } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 22557d96..5afc6490 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -172,7 +172,8 @@ Decode the code-blocks of a tile @param tilec The tile to decode @param tccp Tile coding parameters */ -OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, +void opj_t1_decode_cblks( opj_thread_pool_t* tp, + volatile OPJ_BOOL* pret, opj_tcd_tilecomp_t* tilec, opj_tccp_t* tccp); diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index d76a3f9d..a34fa18c 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1568,30 +1568,22 @@ static OPJ_BOOL opj_tcd_t2_decode (opj_tcd_t *p_tcd, static OPJ_BOOL opj_tcd_t1_decode ( opj_tcd_t *p_tcd ) { OPJ_UINT32 compno; - opj_t1_t * l_t1; opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcd_tilecomp_t* l_tile_comp = l_tile->comps; opj_tccp_t * l_tccp = p_tcd->tcp->tccps; - - - l_t1 = opj_t1_create(OPJ_FALSE); - if (l_t1 == 00) { - return OPJ_FALSE; - } + volatile OPJ_BOOL ret = OPJ_TRUE; for (compno = 0; compno < l_tile->numcomps; ++compno) { - /* The +3 is headroom required by the vectorized DWT */ - if (OPJ_FALSE == opj_t1_decode_cblks(l_t1, l_tile_comp, l_tccp)) { - opj_t1_destroy(l_t1); - return OPJ_FALSE; - } + opj_t1_decode_cblks(p_tcd->thread_pool, &ret, l_tile_comp, l_tccp); + if( !ret ) + break; ++l_tile_comp; ++l_tccp; } - opj_t1_destroy(l_t1); + opj_thread_pool_wait_completion(p_tcd->thread_pool, 0); - return OPJ_TRUE; + return ret; } diff --git a/src/lib/openjp2/tls_keys.h b/src/lib/openjp2/tls_keys.h new file mode 100644 index 00000000..fb26498d --- /dev/null +++ b/src/lib/openjp2/tls_keys.h @@ -0,0 +1,37 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2016, Even Rouault + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TLS_KEYS_H +#define TLS_KEYS_H + +#define OPJ_TLS_KEY_T1 0 + +#endif