Tiny perf improvement in T1 stage for subtile decoding

This commit is contained in:
Even Rouault 2017-09-01 16:30:58 +02:00
parent 18635df518
commit ccac773556
2 changed files with 14 additions and 27 deletions

View File

@ -1674,32 +1674,18 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
assert((cblk->decoded_data != NULL) || (tilec->data != NULL)); assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
if (cblk->decoded_data) { if (cblk->decoded_data) {
OPJ_UINT32 cblk_size = cblk_w * cblk_h;
if (tccp->qmfbid == 1) { if (tccp->qmfbid == 1) {
for (j = 0; j < cblk_h; ++j) { for (i = 0; i < cblk_size; ++i) {
i = 0; datap[i] /= 2;
for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
datap[(j * cblk_w) + i + 0U] = tmp0 / 2;
datap[(j * cblk_w) + i + 1U] = tmp1 / 2;
datap[(j * cblk_w) + i + 2U] = tmp2 / 2;
datap[(j * cblk_w) + i + 3U] = tmp3 / 2;
}
for (; i < cblk_w; ++i) {
datap[(j * cblk_w) + i] /= 2;
}
} }
} else { /* if (tccp->qmfbid == 0) */ } else { /* if (tccp->qmfbid == 0) */
for (j = 0; j < cblk_h; ++j) { for (i = 0; i < cblk_size; ++i) {
for (i = 0; i < cblk_w; ++i) {
OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
memcpy(datap, &tmp, sizeof(tmp)); memcpy(datap, &tmp, sizeof(tmp));
datap++; datap++;
} }
} }
}
} else if (tccp->qmfbid == 1) { } else if (tccp->qmfbid == 1) {
OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w +
(size_t)x]; (size_t)x];
@ -1775,7 +1761,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
cblk->x0, cblk->y0, resno, bandno); cblk->x0, cblk->y0, resno, bandno);
#endif #endif
opj_free(cblk->decoded_data); opj_aligned_free(cblk->decoded_data);
cblk->decoded_data = NULL; cblk->decoded_data = NULL;
} }
} }
@ -1799,7 +1785,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n", printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
cblk->x0, cblk->y0, resno, bandno); cblk->x0, cblk->y0, resno, bandno);
#endif #endif
opj_free(cblk->decoded_data); opj_aligned_free(cblk->decoded_data);
cblk->decoded_data = NULL; cblk->decoded_data = NULL;
} }
continue; continue;
@ -1823,7 +1809,7 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
cblk->x0, cblk->y0, resno, bandno); cblk->x0, cblk->y0, resno, bandno);
#endif #endif
/* Zero-init required */ /* Zero-init required */
cblk->decoded_data = opj_calloc(1, cblk_w * cblk_h * sizeof(OPJ_INT32)); cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32));
if (cblk->decoded_data == NULL) { if (cblk->decoded_data == NULL) {
if (p_manager_mutex) { if (p_manager_mutex) {
opj_mutex_lock(p_manager_mutex); opj_mutex_lock(p_manager_mutex);
@ -1836,10 +1822,11 @@ void opj_t1_decode_cblks(opj_tcd_t* tcd,
*pret = OPJ_FALSE; *pret = OPJ_FALSE;
return; return;
} }
memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32));
} else if (cblk->decoded_data) { } else if (cblk->decoded_data) {
/* Not sure if that code path can happen, but better be */ /* Not sure if that code path can happen, but better be */
/* safe than sorry */ /* safe than sorry */
opj_free(cblk->decoded_data); opj_aligned_free(cblk->decoded_data);
cblk->decoded_data = NULL; cblk->decoded_data = NULL;
} }

View File

@ -1262,7 +1262,7 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc; OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc;
OPJ_UINT32 i; OPJ_UINT32 i;
opj_free(p_code_block->decoded_data); opj_aligned_free(p_code_block->decoded_data);
p_code_block->decoded_data = 00; p_code_block->decoded_data = 00;
memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t)); memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t));
@ -2177,7 +2177,7 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct)
l_code_block->chunks = 00; l_code_block->chunks = 00;
} }
opj_free(l_code_block->decoded_data); opj_aligned_free(l_code_block->decoded_data);
l_code_block->decoded_data = NULL; l_code_block->decoded_data = NULL;
++l_code_block; ++l_code_block;