Use thread-pool for T1 decoding

This commit is contained in:
Even Rouault 2016-05-25 16:38:44 +02:00
parent d4b7f03cfa
commit 5fbb8b2645
5 changed files with 191 additions and 94 deletions

View File

@ -183,6 +183,7 @@ static INLINE long opj_lrintf(float f) {
#include "cio.h"
#include "thread.h"
#include "tls_keys.h"
#include "image.h"
#include "invert.h"

View File

@ -1540,13 +1540,140 @@ void opj_t1_destroy(opj_t1_t *p_t1)
opj_free(p_t1);
}
OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1,
opj_tcd_tilecomp_t* tilec,
opj_tccp_t* tccp
)
typedef struct
{
OPJ_UINT32 resno;
opj_tcd_cblk_dec_t* cblk;
opj_tcd_band_t* band;
opj_tcd_tilecomp_t* tilec;
opj_tccp_t* tccp;
volatile OPJ_BOOL* pret;
} opj_t1_cblk_decode_processing_job_t;
static void opj_t1_destroy_wrapper(void* t1)
{
opj_t1_destroy( (opj_t1_t*) t1 );
}
static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
{
opj_tcd_cblk_dec_t* cblk;
opj_tcd_band_t* band;
opj_tcd_tilecomp_t* tilec;
opj_tccp_t* tccp;
OPJ_INT32* restrict datap;
OPJ_UINT32 cblk_w, cblk_h;
OPJ_INT32 x, y;
OPJ_UINT32 i, j;
opj_t1_cblk_decode_processing_job_t* job;
opj_t1_t* t1;
OPJ_UINT32 resno;
OPJ_UINT32 tile_w;
job = (opj_t1_cblk_decode_processing_job_t*) user_data;
resno = job->resno;
cblk = job->cblk;
band = job->band;
tilec = job->tilec;
tccp = job->tccp;
tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
if( !*(job->pret) )
{
opj_free(job);
return;
}
t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
if( t1 == NULL )
{
t1 = opj_t1_create( OPJ_FALSE );
opj_tls_set( tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper );
}
if (OPJ_FALSE == opj_t1_decode_cblk(
t1,
cblk,
band->bandno,
(OPJ_UINT32)tccp->roishift,
tccp->cblksty)) {
*(job->pret) = OPJ_FALSE;
opj_free(job);
return;
}
x = cblk->x0 - band->x0;
y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
}
if (band->bandno & 2) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
y += pres->y1 - pres->y0;
}
datap=t1->data;
cblk_w = t1->w;
cblk_h = t1->h;
if (tccp->roishift) {
OPJ_INT32 thresh = 1 << tccp->roishift;
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
OPJ_INT32 val = datap[(j * cblk_w) + i];
OPJ_INT32 mag = abs(val);
if (mag >= thresh) {
mag >>= tccp->roishift;
datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
}
}
}
}
if (tccp->qmfbid == 1) {
OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
for (j = 0; j < cblk_h; ++j) {
i = 0;
for (; i < (cblk_w & ~3); i += 4) {
OPJ_INT32 tmp0 = datap[(j * cblk_w) + i];
OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1];
OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2];
OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3];
((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2;
((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2;
((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2;
((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2;
}
for (; i < cblk_w; ++i) {
OPJ_INT32 tmp = datap[(j * cblk_w) + i];
((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2;
}
}
} else { /* if (tccp->qmfbid == 0) */
OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
for (j = 0; j < cblk_h; ++j) {
OPJ_FLOAT32* restrict tiledp2 = tiledp;
for (i = 0; i < cblk_w; ++i) {
OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize;
*tiledp2 = tmp;
datap++;
tiledp2++;
}
tiledp += tile_w;
}
}
opj_free(job);
}
void opj_t1_decode_cblks( opj_thread_pool_t* tp,
volatile OPJ_BOOL* pret,
opj_tcd_tilecomp_t* tilec,
opj_tccp_t* tccp
)
{
OPJ_UINT32 resno, bandno, precno, cblkno;
OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
opj_tcd_resolution_t* res = &tilec->resolutions[resno];
@ -1559,85 +1686,24 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1,
for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
OPJ_INT32* restrict datap;
OPJ_UINT32 cblk_w, cblk_h;
OPJ_INT32 x, y;
OPJ_UINT32 i, j;
opj_t1_cblk_decode_processing_job_t* job;
if (OPJ_FALSE == opj_t1_decode_cblk(
t1,
cblk,
band->bandno,
(OPJ_UINT32)tccp->roishift,
tccp->cblksty)) {
return OPJ_FALSE;
}
x = cblk->x0 - band->x0;
y = cblk->y0 - band->y0;
if (band->bandno & 1) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
x += pres->x1 - pres->x0;
}
if (band->bandno & 2) {
opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
y += pres->y1 - pres->y0;
}
datap=t1->data;
cblk_w = t1->w;
cblk_h = t1->h;
if (tccp->roishift) {
OPJ_INT32 thresh = 1 << tccp->roishift;
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
OPJ_INT32 val = datap[(j * cblk_w) + i];
OPJ_INT32 mag = abs(val);
if (mag >= thresh) {
mag >>= tccp->roishift;
datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
}
}
}
}
if (tccp->qmfbid == 1) {
OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
for (j = 0; j < cblk_h; ++j) {
i = 0;
for (; i < (cblk_w & ~3); i += 4) {
OPJ_INT32 tmp0 = datap[(j * cblk_w) + i];
OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1];
OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2];
OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3];
((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2;
((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2;
((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2;
((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2;
}
for (; i < cblk_w; ++i) {
OPJ_INT32 tmp = datap[(j * cblk_w) + i];
((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2;
}
}
} else { /* if (tccp->qmfbid == 0) */
OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
for (j = 0; j < cblk_h; ++j) {
OPJ_FLOAT32* restrict tiledp2 = tiledp;
for (i = 0; i < cblk_w; ++i) {
OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize;
*tiledp2 = tmp;
datap++;
tiledp2++;
}
tiledp += tile_w;
}
}
job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, sizeof(opj_t1_cblk_decode_processing_job_t));
job->resno = resno;
job->cblk = cblk;
job->band = band;
job->tilec = tilec;
job->tccp = tccp;
job->pret = pret;
opj_thread_pool_submit_job( tp, opj_t1_clbl_decode_processor, job );
if( !(*pret) )
return;
} /* cblkno */
} /* precno */
} /* bandno */
} /* resno */
return OPJ_TRUE;
return;
}

View File

@ -172,7 +172,8 @@ Decode the code-blocks of a tile
@param tilec The tile to decode
@param tccp Tile coding parameters
*/
OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1,
void opj_t1_decode_cblks( opj_thread_pool_t* tp,
volatile OPJ_BOOL* pret,
opj_tcd_tilecomp_t* tilec,
opj_tccp_t* tccp);

View File

@ -1568,30 +1568,22 @@ static OPJ_BOOL opj_tcd_t2_decode (opj_tcd_t *p_tcd,
static OPJ_BOOL opj_tcd_t1_decode ( opj_tcd_t *p_tcd )
{
OPJ_UINT32 compno;
opj_t1_t * l_t1;
opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
opj_tcd_tilecomp_t* l_tile_comp = l_tile->comps;
opj_tccp_t * l_tccp = p_tcd->tcp->tccps;
l_t1 = opj_t1_create(OPJ_FALSE);
if (l_t1 == 00) {
return OPJ_FALSE;
}
volatile OPJ_BOOL ret = OPJ_TRUE;
for (compno = 0; compno < l_tile->numcomps; ++compno) {
/* The +3 is headroom required by the vectorized DWT */
if (OPJ_FALSE == opj_t1_decode_cblks(l_t1, l_tile_comp, l_tccp)) {
opj_t1_destroy(l_t1);
return OPJ_FALSE;
}
opj_t1_decode_cblks(p_tcd->thread_pool, &ret, l_tile_comp, l_tccp);
if( !ret )
break;
++l_tile_comp;
++l_tccp;
}
opj_t1_destroy(l_t1);
opj_thread_pool_wait_completion(p_tcd->thread_pool, 0);
return OPJ_TRUE;
return ret;
}

View File

@ -0,0 +1,37 @@
/*
* The copyright in this software is being made available under the 2-clauses
* BSD License, included below. This software may be subject to other third
* party and contributor rights, including patent rights, and no such rights
* are granted under this license.
*
* Copyright (c) 2016, Even Rouault
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TLS_KEYS_H
#define TLS_KEYS_H
#define OPJ_TLS_KEY_T1 0
#endif