From 426bf8d337715f7b2e867cb2643128e5c2e3b5bb Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Sun, 27 Dec 2015 20:14:47 +0100 Subject: [PATCH 01/33] Move some MQC functions into a header for speed Allow these hot functions to be inlined. This boosts decode performance by ~10%. --- src/lib/openjp2/CMakeLists.txt | 1 + src/lib/openjp2/mqc.c | 117 ------------------------ src/lib/openjp2/mqc.h | 4 +- src/lib/openjp2/mqc_inl.h | 159 +++++++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 118 deletions(-) create mode 100644 src/lib/openjp2/mqc_inl.h diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index 367a7a8d..c02a9948 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -29,6 +29,7 @@ set(OPENJPEG_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/mct.h ${CMAKE_CURRENT_SOURCE_DIR}/mqc.c ${CMAKE_CURRENT_SOURCE_DIR}/mqc.h + ${CMAKE_CURRENT_SOURCE_DIR}/mqc_inl.h ${CMAKE_CURRENT_SOURCE_DIR}/openjpeg.c ${CMAKE_CURRENT_SOURCE_DIR}/openjpeg.h ${CMAKE_CURRENT_SOURCE_DIR}/opj_clock.c diff --git a/src/lib/openjp2/mqc.c b/src/lib/openjp2/mqc.c index 4e409a7c..7119c3a5 100644 --- a/src/lib/openjp2/mqc.c +++ b/src/lib/openjp2/mqc.c @@ -70,28 +70,6 @@ Fill mqc->c with 1's for flushing @param mqc MQC handle */ static void opj_mqc_setbits(opj_mqc_t *mqc); -/** -FIXME DOC -@param mqc MQC handle -@return -*/ -static INLINE OPJ_INT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc); -/** -FIXME DOC -@param mqc MQC handle -@return -*/ -static INLINE OPJ_INT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc); -/** -Input a byte -@param mqc MQC handle -*/ -static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc); -/** -Renormalize mqc->a and mqc->c while decoding -@param mqc MQC handle -*/ -static INLINE void opj_mqc_renormd(opj_mqc_t *const mqc); /*@}*/ /*@}*/ @@ -284,82 +262,6 @@ static void opj_mqc_setbits(opj_mqc_t *mqc) { } } -static INLINE OPJ_INT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc) { - OPJ_INT32 d; - if (mqc->a < (*mqc->curctx)->qeval) { - d = (OPJ_INT32)(1 - (*mqc->curctx)->mps); - *mqc->curctx = (*mqc->curctx)->nlps; - } else { - d = (OPJ_INT32)(*mqc->curctx)->mps; - *mqc->curctx = (*mqc->curctx)->nmps; - } - - return d; -} - -static INLINE OPJ_INT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc) { - OPJ_INT32 d; - if (mqc->a < (*mqc->curctx)->qeval) { - mqc->a = (*mqc->curctx)->qeval; - d = (OPJ_INT32)(*mqc->curctx)->mps; - *mqc->curctx = (*mqc->curctx)->nmps; - } else { - mqc->a = (*mqc->curctx)->qeval; - d = (OPJ_INT32)(1 - (*mqc->curctx)->mps); - *mqc->curctx = (*mqc->curctx)->nlps; - } - - return d; -} - -#ifdef MQC_PERF_OPT -static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc) { - unsigned int i = *((unsigned int *) mqc->bp); - mqc->c += i & 0xffff00; - mqc->ct = i & 0x0f; - mqc->bp += (i >> 2) & 0x04; -} -#else -static void opj_mqc_bytein(opj_mqc_t *const mqc) { - if (mqc->bp != mqc->end) { - OPJ_UINT32 c; - if (mqc->bp + 1 != mqc->end) { - c = *(mqc->bp + 1); - } else { - c = 0xff; - } - if (*mqc->bp == 0xff) { - if (c > 0x8f) { - mqc->c += 0xff00; - mqc->ct = 8; - } else { - mqc->bp++; - mqc->c += c << 9; - mqc->ct = 7; - } - } else { - mqc->bp++; - mqc->c += c << 8; - mqc->ct = 8; - } - } else { - mqc->c += 0xff00; - mqc->ct = 8; - } -} -#endif - -static INLINE void opj_mqc_renormd(opj_mqc_t *const mqc) { - do { - if (mqc->ct == 0) { - opj_mqc_bytein(mqc); - } - mqc->a <<= 1; - mqc->c <<= 1; - mqc->ct--; - } while (mqc->a < 0x8000); -} - /* ========================================================== MQ-Coder interface @@ -585,25 +487,6 @@ OPJ_BOOL opj_mqc_init_dec(opj_mqc_t *mqc, OPJ_BYTE *bp, OPJ_UINT32 len) { return OPJ_TRUE; } -OPJ_INT32 opj_mqc_decode(opj_mqc_t *const mqc) { - OPJ_INT32 d; - mqc->a -= (*mqc->curctx)->qeval; - if ((mqc->c >> 16) < (*mqc->curctx)->qeval) { - d = opj_mqc_lpsexchange(mqc); - opj_mqc_renormd(mqc); - } else { - mqc->c -= (*mqc->curctx)->qeval << 16; - if ((mqc->a & 0x8000) == 0) { - d = opj_mqc_mpsexchange(mqc); - opj_mqc_renormd(mqc); - } else { - d = (OPJ_INT32)(*mqc->curctx)->mps; - } - } - - return d; -} - void opj_mqc_resetstates(opj_mqc_t *mqc) { OPJ_UINT32 i; for (i = 0; i < MQC_NUMCTXS; i++) { diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h index 69a2d460..574c599b 100644 --- a/src/lib/openjp2/mqc.h +++ b/src/lib/openjp2/mqc.h @@ -82,6 +82,8 @@ typedef struct opj_mqc { #endif } opj_mqc_t; +#include "mqc_inl.h" + /** @name Exported functions */ /*@{*/ /* ----------------------------------------------------------------------- */ @@ -198,7 +200,7 @@ Decode a symbol @param mqc MQC handle @return Returns the decoded symbol (0 or 1) */ -OPJ_INT32 opj_mqc_decode(opj_mqc_t * const mqc); +static INLINE OPJ_INT32 opj_mqc_decode(opj_mqc_t * const mqc); /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/src/lib/openjp2/mqc_inl.h b/src/lib/openjp2/mqc_inl.h new file mode 100644 index 00000000..882b59f4 --- /dev/null +++ b/src/lib/openjp2/mqc_inl.h @@ -0,0 +1,159 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium + * Copyright (c) 2002-2014, Professor Benoit Macq + * Copyright (c) 2001-2003, David Janssens + * Copyright (c) 2002-2003, Yannick Verschueren + * Copyright (c) 2003-2007, Francois-Olivier Devaux + * Copyright (c) 2003-2014, Antonin Descampe + * Copyright (c) 2005, Herve Drolon, FreeImage Team + * Copyright (c) 2008, Jerome Fimes, Communications & Systemes + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __MQC_INL_H +#define __MQC_INL_H +/** +FIXME DOC +@param mqc MQC handle +@return +*/ +static INLINE OPJ_INT32 opj_mqc_mpsexchange(opj_mqc_t *const mqc) { + OPJ_INT32 d; + if (mqc->a < (*mqc->curctx)->qeval) { + d = (OPJ_INT32)(1 - (*mqc->curctx)->mps); + *mqc->curctx = (*mqc->curctx)->nlps; + } else { + d = (OPJ_INT32)(*mqc->curctx)->mps; + *mqc->curctx = (*mqc->curctx)->nmps; + } + + return d; +} + +/** +FIXME DOC +@param mqc MQC handle +@return +*/ +static INLINE OPJ_INT32 opj_mqc_lpsexchange(opj_mqc_t *const mqc) { + OPJ_INT32 d; + if (mqc->a < (*mqc->curctx)->qeval) { + mqc->a = (*mqc->curctx)->qeval; + d = (OPJ_INT32)(*mqc->curctx)->mps; + *mqc->curctx = (*mqc->curctx)->nmps; + } else { + mqc->a = (*mqc->curctx)->qeval; + d = (OPJ_INT32)(1 - (*mqc->curctx)->mps); + *mqc->curctx = (*mqc->curctx)->nlps; + } + + return d; +} + +/** +Input a byte +@param mqc MQC handle +*/ +#ifdef MQC_PERF_OPT +static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc) { + unsigned int i = *((unsigned int *) mqc->bp); + mqc->c += i & 0xffff00; + mqc->ct = i & 0x0f; + mqc->bp += (i >> 2) & 0x04; +} +#else +static INLINE void opj_mqc_bytein(opj_mqc_t *const mqc) { + if (mqc->bp != mqc->end) { + OPJ_UINT32 c; + if (mqc->bp + 1 != mqc->end) { + c = *(mqc->bp + 1); + } else { + c = 0xff; + } + if (*mqc->bp == 0xff) { + if (c > 0x8f) { + mqc->c += 0xff00; + mqc->ct = 8; + } else { + mqc->bp++; + mqc->c += c << 9; + mqc->ct = 7; + } + } else { + mqc->bp++; + mqc->c += c << 8; + mqc->ct = 8; + } + } else { + mqc->c += 0xff00; + mqc->ct = 8; + } +} +#endif + +/** +Renormalize mqc->a and mqc->c while decoding +@param mqc MQC handle +*/ +static INLINE void opj_mqc_renormd(opj_mqc_t *const mqc) { + do { + if (mqc->ct == 0) { + opj_mqc_bytein(mqc); + } + mqc->a <<= 1; + mqc->c <<= 1; + mqc->ct--; + } while (mqc->a < 0x8000); +} + +/** +Decode a symbol +@param mqc MQC handle +@return Returns the decoded symbol (0 or 1) +*/ +static INLINE OPJ_INT32 opj_mqc_decode(opj_mqc_t *const mqc) { + OPJ_INT32 d; + mqc->a -= (*mqc->curctx)->qeval; + if ((mqc->c >> 16) < (*mqc->curctx)->qeval) { + d = opj_mqc_lpsexchange(mqc); + opj_mqc_renormd(mqc); + } else { + mqc->c -= (*mqc->curctx)->qeval << 16; + if ((mqc->a & 0x8000) == 0) { + d = opj_mqc_mpsexchange(mqc); + opj_mqc_renormd(mqc); + } else { + d = (OPJ_INT32)(*mqc->curctx)->mps; + } + } + + return d; +} + +#endif /* __MQC_INL_H */ From c539808d097945866c0f7120ccdea28921a011a2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 21 May 2016 15:41:36 +0200 Subject: [PATCH 02/33] opj_t1_updateflags(): tiny optimization We can avoid using a loop-up table with some shift arithmetics. --- src/lib/openjp2/t1.c | 19 +++++++++---------- src/lib/openjp2/t1.h | 2 ++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 1bf7205e..37fc4fc1 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -343,23 +343,22 @@ static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stri opj_flag_t *np = flagsp - stride; opj_flag_t *sp = flagsp + stride; - static const opj_flag_t mod[] = { - T1_SIG_S, T1_SIG_S|T1_SGN_S, - T1_SIG_E, T1_SIG_E|T1_SGN_E, - T1_SIG_W, T1_SIG_W|T1_SGN_W, - T1_SIG_N, T1_SIG_N|T1_SGN_N - }; + /* We strongly rely on (T1_SGN_N == 0x0100) == (T1_SIG_N == 0x0010) << 4 */ + /* and T1_SIG_E == T1_SIG_N << 1, T1_SIG_W == T1_SIG_N << 2 and T1_SIG_S == T1_SIG_N << 2 */ + /* and T1_SGN_E == T1_SGN_N << 1, T1_SGN_W == T1_SGN_N << 2 and T1_SGN_S == T1_SGN_N << 2 */ + + opj_flag_t flag_N = T1_SIG_N | (T1_SIG_N << (4 * s)); np[-1] |= T1_SIG_SE; - np[0] |= mod[s]; + np[0] |= flag_N << 2; np[1] |= T1_SIG_SW; - flagsp[-1] |= mod[s+2]; + flagsp[-1] |= flag_N << 1; flagsp[0] |= T1_SIG; - flagsp[1] |= mod[s+4]; + flagsp[1] |= flag_N << 3; sp[-1] |= T1_SIG_NE; - sp[0] |= mod[s+6]; + sp[0] |= flag_N; sp[1] |= T1_SIG_NW; } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 3bc0ad9e..e9d3db57 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -50,6 +50,8 @@ in T1.C are used by some function in TCD.C. /* ----------------------------------------------------------------------- */ #define T1_NMSEDEC_BITS 7 +/* CAUTION: the value of those constants must not be changed, otherwise the */ +/* optimization of opj_t1_updateflags() will break! */ #define T1_SIG_NE 0x0001 /**< Context orientation : North-East direction */ #define T1_SIG_SE 0x0002 /**< Context orientation : South-East direction */ #define T1_SIG_SW 0x0004 /**< Context orientation : South-West direction */ From d8fef96f23ea8b12226d7326118f2ffd91da28ac Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 21 May 2016 15:52:02 +0200 Subject: [PATCH 03/33] Improve code generation in opj_t1_dec_clnpass() Add a opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit() method that does the job of opj_t1_dec_clnpass_step_only() assuming the conditions are met. And use it in opj_t1_dec_clnpass(). The compiler generates more efficient code. --- src/lib/openjp2/t1.c | 47 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 37fc4fc1..124d68ea 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -906,6 +906,32 @@ static void opj_t1_dec_clnpass_step( *flagsp &= ~T1_VISIT; } /* VSC and BYPASS by Antonin */ +static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( + opj_t1_t *t1, + opj_flag_t *flagsp, + OPJ_INT32 *datap, + OPJ_INT32 orient, + OPJ_INT32 oneplushalf) +{ + OPJ_INT32 v; + OPJ_INT32 flag; + + opj_mqc_t *mqc = t1->mqc; /* MQC component */ + + flag = *flagsp; + /*if (!(flag & (T1_SIG | T1_VISIT)))*/ + { + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc((OPJ_UINT32)flag, (OPJ_UINT32)orient)); + if (opj_mqc_decode(mqc)) { + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); + v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); + *datap = v ? -oneplushalf : oneplushalf; + opj_t1_updateflags(flagsp, v, t1->flags_stride); + } + } + /*flagsp &= ~T1_VISIT;*/ +} + static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -1084,17 +1110,30 @@ static void opj_t1_dec_clnpass( data2 += t1->w; } } else { + opj_flag_t flag; flags2 += t1->flags_stride; - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); + flag = *flags2; + if (!(flag & (T1_SIG | T1_VISIT))) + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); + *flags2 &= ~T1_VISIT; data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); + flag = *flags2; + if (!(flag & (T1_SIG | T1_VISIT))) + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); + *flags2 &= ~T1_VISIT; data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); + flag = *flags2; + if (!(flag & (T1_SIG | T1_VISIT))) + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); + *flags2 &= ~T1_VISIT; data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); + flag = *flags2; + if (!(flag & (T1_SIG | T1_VISIT))) + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); + *flags2 &= ~T1_VISIT; data2 += t1->w; } } From 23a01dfdef1a266754af268b07d912efbe04a759 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sat, 21 May 2016 16:27:35 +0200 Subject: [PATCH 04/33] Specialize decoding passes for 64x64 code blocks --- src/lib/openjp2/t1.c | 586 ++++++++++++++++++++++++------------------- 1 file changed, 334 insertions(+), 252 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 124d68ea..07e358a5 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -118,10 +118,6 @@ static void opj_t1_dec_sigpass_raw( OPJ_INT32 bpno, OPJ_INT32 orient, OPJ_INT32 cblksty); -static void opj_t1_dec_sigpass_mqc( - opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 orient); static void opj_t1_dec_sigpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno, @@ -158,9 +154,6 @@ static void opj_t1_dec_refpass_raw( opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 cblksty); -static void opj_t1_dec_refpass_mqc( - opj_t1_t *t1, - OPJ_INT32 bpno); static void opj_t1_dec_refpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno); @@ -536,49 +529,63 @@ static void opj_t1_dec_sigpass_raw( } } /* VSC and BYPASS by Antonin */ -static void opj_t1_dec_sigpass_mqc( +#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, orient, w, h, flags_stride) \ +{ \ + OPJ_INT32 one, half, oneplushalf; \ + OPJ_UINT32 i, j, k; \ + OPJ_INT32 *data1 = t1->data; \ + opj_flag_t *flags1 = &t1->flags[1]; \ + one = 1 << bpno; \ + half = one >> 1; \ + oneplushalf = one | half; \ + for (k = 0; k < (h & ~3u); k += 4) { \ + for (i = 0; i < w; ++i) { \ + OPJ_INT32 *data2 = data1 + i; \ + opj_flag_t *flags2 = flags1 + i; \ + flags2 += flags_stride; \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + data2 += w; \ + flags2 += flags_stride; \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + data2 += w; \ + flags2 += flags_stride; \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + data2 += w; \ + flags2 += flags_stride; \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + data2 += w; \ + } \ + data1 += w << 2; \ + flags1 += flags_stride << 2; \ + } \ + for (i = 0; i < w; ++i) { \ + OPJ_INT32 *data2 = data1 + i; \ + opj_flag_t *flags2 = flags1 + i; \ + for (j = k; j < h; ++j) { \ + flags2 += flags_stride; \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + data2 += w; \ + } \ + } \ +} + +static void opj_t1_dec_sigpass_mqc_64x64( opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 orient) { - OPJ_INT32 one, half, oneplushalf; - OPJ_UINT32 i, j, k; - OPJ_INT32 *data1 = t1->data; - opj_flag_t *flags1 = &t1->flags[1]; - one = 1 << bpno; - half = one >> 1; - oneplushalf = one | half; - for (k = 0; k < (t1->h & ~3u); k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); - data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); - data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); - data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); - data2 += t1->w; - } - data1 += t1->w << 2; - flags1 += t1->flags_stride << 2; - } - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - for (j = k; j < t1->h; ++j) { - flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); - data2 += t1->w; - } - } -} /* VSC and BYPASS by Antonin */ + opj_t1_dec_sigpass_mqc_internal(t1, bpno, orient, 64, 64, 66); +} +static void opj_t1_dec_sigpass_mqc_generic( + opj_t1_t *t1, + OPJ_INT32 bpno, + OPJ_INT32 orient) +{ + opj_t1_dec_sigpass_mqc_internal(t1, bpno, orient, t1->w, t1->h, t1->flags_stride); +} + +/* VSC and BYPASS by Antonin */ static void opj_t1_dec_sigpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno, @@ -757,48 +764,61 @@ static void opj_t1_dec_refpass_raw( } } /* VSC and BYPASS by Antonin */ -static void opj_t1_dec_refpass_mqc( +#define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \ +{ \ + OPJ_INT32 one, poshalf, neghalf; \ + OPJ_UINT32 i, j, k; \ + OPJ_INT32 *data1 = t1->data; \ + opj_flag_t *flags1 = &t1->flags[1]; \ + one = 1 << bpno; \ + poshalf = one >> 1; \ + neghalf = bpno > 0 ? -poshalf : -1; \ + for (k = 0; k < (h & ~3u); k += 4) { \ + for (i = 0; i < w; ++i) { \ + OPJ_INT32 *data2 = data1 + i; \ + opj_flag_t *flags2 = flags1 + i; \ + flags2 += flags_stride; \ + opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + data2 += w; \ + flags2 += flags_stride; \ + opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + data2 += w; \ + flags2 += flags_stride; \ + opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + data2 += w; \ + flags2 += flags_stride; \ + opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + data2 += w; \ + } \ + data1 += w << 2; \ + flags1 += flags_stride << 2; \ + } \ + for (i = 0; i < w; ++i) { \ + OPJ_INT32 *data2 = data1 + i; \ + opj_flag_t *flags2 = flags1 + i; \ + for (j = k; j < h; ++j) { \ + flags2 += flags_stride; \ + opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + data2 += w; \ + } \ + } \ +} + +static void opj_t1_dec_refpass_mqc_64x64( opj_t1_t *t1, OPJ_INT32 bpno) { - OPJ_INT32 one, poshalf, neghalf; - OPJ_UINT32 i, j, k; - OPJ_INT32 *data1 = t1->data; - opj_flag_t *flags1 = &t1->flags[1]; - one = 1 << bpno; - poshalf = one >> 1; - neghalf = bpno > 0 ? -poshalf : -1; - for (k = 0; k < (t1->h & ~3u); k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); - data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); - data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); - data2 += t1->w; - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); - data2 += t1->w; - } - data1 += t1->w << 2; - flags1 += t1->flags_stride << 2; - } - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - for (j = k; j < t1->h; ++j) { - flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); - data2 += t1->w; - } - } -} /* VSC and BYPASS by Antonin */ + opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66); +} +static void opj_t1_dec_refpass_mqc_generic( + opj_t1_t *t1, + OPJ_INT32 bpno) +{ + opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride); +} + +/* VSC and BYPASS by Antonin */ static void opj_t1_dec_refpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno) @@ -1028,143 +1048,159 @@ static void opj_t1_enc_clnpass( } } -static void opj_t1_dec_clnpass( +#define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] + +#define opj_t1_dec_clnpass_internal(t1, bpno, orient, cblksty, w, h, flags_stride) \ +{ \ + OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ + OPJ_UINT32 i, j, k; \ + OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ + \ + opj_mqc_t *mqc = t1->mqc; /* MQC component */ \ + \ + one = 1 << bpno; \ + half = one >> 1; \ + oneplushalf = one | half; \ + if (cblksty & J2K_CCP_CBLKSTY_VSC) { \ + for (k = 0; k < h; k += 4) { \ + for (i = 0; i < w; ++i) { \ + if (k + 3 < h) { \ + agg = !(MACRO_t1_flags_internal(1 + k,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ + || MACRO_t1_flags_internal(1 + k + 1,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ + || MACRO_t1_flags_internal(1 + k + 2,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ + || (MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \ + & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); \ + } else { \ + agg = 0; \ + } \ + if (agg) { \ + opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ + if (!opj_mqc_decode(mqc)) { \ + continue; \ + } \ + opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ + runlen = opj_mqc_decode(mqc); \ + runlen = (runlen << 1) | opj_mqc_decode(mqc); \ + } else { \ + runlen = 0; \ + } \ + for (j = k + (OPJ_UINT32)runlen; j < k + 4 && j < h; ++j) { \ + vsc = (j == k + 3 || j == h - 1) ? 1 : 0; \ + opj_t1_dec_clnpass_step_vsc( \ + t1, \ + &t1->flags[((j+1) * flags_stride) + i + 1], \ + &t1->data[(j * w) + i], \ + orient, \ + oneplushalf, \ + agg && (j == k + (OPJ_UINT32)runlen), \ + vsc); \ + } \ + } \ + } \ + } else { \ + OPJ_INT32 *data1 = t1->data; \ + opj_flag_t *flags1 = &t1->flags[1]; \ + for (k = 0; k < (h & ~3u); k += 4) { \ + for (i = 0; i < w; ++i) { \ + OPJ_INT32 *data2 = data1 + i; \ + opj_flag_t *flags2 = flags1 + i; \ + agg = !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \ + MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \ + MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \ + MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); \ + if (agg) { \ + opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ + if (!opj_mqc_decode(mqc)) { \ + continue; \ + } \ + opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ + runlen = opj_mqc_decode(mqc); \ + runlen = (runlen << 1) | opj_mqc_decode(mqc); \ + flags2 += (OPJ_UINT32)runlen * flags_stride; \ + data2 += (OPJ_UINT32)runlen * w; \ + for (j = (OPJ_UINT32)runlen; j < 4 && j < h; ++j) { \ + flags2 += flags_stride; \ + if (agg && (j == (OPJ_UINT32)runlen)) { \ + opj_t1_dec_clnpass_step_partial(t1, flags2, data2, orient, oneplushalf); \ + } else { \ + opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); \ + } \ + data2 += w; \ + } \ + } else { \ + opj_flag_t flag; \ + flags2 += flags_stride; \ + flag = *flags2; \ + if (!(flag & (T1_SIG | T1_VISIT))) \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + *flags2 &= ~T1_VISIT; \ + data2 += w; \ + flags2 += flags_stride; \ + flag = *flags2; \ + if (!(flag & (T1_SIG | T1_VISIT))) \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + *flags2 &= ~T1_VISIT; \ + data2 += w; \ + flags2 += flags_stride; \ + flag = *flags2; \ + if (!(flag & (T1_SIG | T1_VISIT))) \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + *flags2 &= ~T1_VISIT; \ + data2 += w; \ + flags2 += flags_stride; \ + flag = *flags2; \ + if (!(flag & (T1_SIG | T1_VISIT))) \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + *flags2 &= ~T1_VISIT; \ + data2 += w; \ + } \ + } \ + data1 += w << 2; \ + flags1 += flags_stride << 2; \ + } \ + for (i = 0; i < w; ++i) { \ + OPJ_INT32 *data2 = data1 + i; \ + opj_flag_t *flags2 = flags1 + i; \ + for (j = k; j < h; ++j) { \ + flags2 += flags_stride; \ + opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); \ + data2 += w; \ + } \ + } \ + } \ + \ + if (segsym) { \ + OPJ_INT32 v = 0; \ + opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \ + v = opj_mqc_decode(mqc); \ + v = (v << 1) | opj_mqc_decode(mqc); \ + v = (v << 1) | opj_mqc_decode(mqc); \ + v = (v << 1) | opj_mqc_decode(mqc); \ + /* \ + if (v!=0xa) { \ + opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); \ + } \ + */ \ + } \ +} /* VSC and BYPASS by Antonin */ + +static void opj_t1_dec_clnpass_64x64( opj_t1_t *t1, OPJ_INT32 bpno, OPJ_INT32 orient, OPJ_INT32 cblksty) { - OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; - OPJ_UINT32 i, j, k; - OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; - - opj_mqc_t *mqc = t1->mqc; /* MQC component */ - - one = 1 << bpno; - half = one >> 1; - oneplushalf = one | half; - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - for (k = 0; k < t1->h; k += 4) { - for (i = 0; i < t1->w; ++i) { - if (k + 3 < t1->h) { - agg = !(MACRO_t1_flags(1 + k,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || MACRO_t1_flags(1 + k + 1,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || MACRO_t1_flags(1 + k + 2,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH) - || (MACRO_t1_flags(1 + k + 3,1 + i) - & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); - } else { - agg = 0; - } - if (agg) { - opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); - if (!opj_mqc_decode(mqc)) { - continue; - } - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); - runlen = opj_mqc_decode(mqc); - runlen = (runlen << 1) | opj_mqc_decode(mqc); - } else { - runlen = 0; - } - for (j = k + (OPJ_UINT32)runlen; j < k + 4 && j < t1->h; ++j) { - vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; - opj_t1_dec_clnpass_step_vsc( - t1, - &t1->flags[((j+1) * t1->flags_stride) + i + 1], - &t1->data[(j * t1->w) + i], - orient, - oneplushalf, - agg && (j == k + (OPJ_UINT32)runlen), - vsc); - } - } - } - } else { - OPJ_INT32 *data1 = t1->data; - opj_flag_t *flags1 = &t1->flags[1]; - for (k = 0; k < (t1->h & ~3u); k += 4) { - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - agg = !((MACRO_t1_flags(1 + k, 1 + i) | - MACRO_t1_flags(1 + k + 1, 1 + i) | - MACRO_t1_flags(1 + k + 2, 1 + i) | - MACRO_t1_flags(1 + k + 3, 1 + i)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); - if (agg) { - opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); - if (!opj_mqc_decode(mqc)) { - continue; - } - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); - runlen = opj_mqc_decode(mqc); - runlen = (runlen << 1) | opj_mqc_decode(mqc); - flags2 += (OPJ_UINT32)runlen * t1->flags_stride; - data2 += (OPJ_UINT32)runlen * t1->w; - for (j = (OPJ_UINT32)runlen; j < 4 && j < t1->h; ++j) { - flags2 += t1->flags_stride; - if (agg && (j == (OPJ_UINT32)runlen)) { - opj_t1_dec_clnpass_step_partial(t1, flags2, data2, orient, oneplushalf); - } else { - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); - } - data2 += t1->w; - } - } else { - opj_flag_t flag; - flags2 += t1->flags_stride; - flag = *flags2; - if (!(flag & (T1_SIG | T1_VISIT))) - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); - *flags2 &= ~T1_VISIT; - data2 += t1->w; - flags2 += t1->flags_stride; - flag = *flags2; - if (!(flag & (T1_SIG | T1_VISIT))) - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); - *flags2 &= ~T1_VISIT; - data2 += t1->w; - flags2 += t1->flags_stride; - flag = *flags2; - if (!(flag & (T1_SIG | T1_VISIT))) - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); - *flags2 &= ~T1_VISIT; - data2 += t1->w; - flags2 += t1->flags_stride; - flag = *flags2; - if (!(flag & (T1_SIG | T1_VISIT))) - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); - *flags2 &= ~T1_VISIT; - data2 += t1->w; - } - } - data1 += t1->w << 2; - flags1 += t1->flags_stride << 2; - } - for (i = 0; i < t1->w; ++i) { - OPJ_INT32 *data2 = data1 + i; - opj_flag_t *flags2 = flags1 + i; - for (j = k; j < t1->h; ++j) { - flags2 += t1->flags_stride; - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); - data2 += t1->w; - } - } - } + opj_t1_dec_clnpass_internal(t1, bpno, orient, cblksty, 64, 64, 66); +} - if (segsym) { - OPJ_INT32 v = 0; - opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); - v = opj_mqc_decode(mqc); - v = (v << 1) | opj_mqc_decode(mqc); - v = (v << 1) | opj_mqc_decode(mqc); - v = (v << 1) | opj_mqc_decode(mqc); - /* - if (v!=0xa) { - opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); - } - */ - } -} /* VSC and BYPASS by Antonin */ +static void opj_t1_dec_clnpass_generic( + opj_t1_t *t1, + OPJ_INT32 bpno, + OPJ_INT32 orient, + OPJ_INT32 cblksty) +{ + opj_t1_dec_clnpass_internal(t1, bpno, orient, cblksty, t1->w, t1->h, t1->flags_stride); +} /** mod fixed_quality */ @@ -1446,45 +1482,91 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } } - for (passno = 0; (passno < seg->real_num_passes) && (bpno_plus_one >= 1); ++passno) { - switch (passtype) { - case 0: - if (type == T1_TYPE_RAW) { - opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); - } else { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one, (OPJ_INT32)orient); - } else { - opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)orient); - } - } - break; - case 1: - if (type == T1_TYPE_RAW) { - opj_t1_dec_refpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); - } else { - if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); - } else { - opj_t1_dec_refpass_mqc(t1, bpno_plus_one); - } - } - break; - case 2: - opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); - break; - } + if( t1->w == 64 && t1->h == 64 ) + { + for (passno = 0; (passno < seg->real_num_passes) && (bpno_plus_one >= 1); ++passno) { + switch (passtype) { + case 0: + if (type == T1_TYPE_RAW) { + opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + } else { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one, (OPJ_INT32)orient); + } else { + opj_t1_dec_sigpass_mqc_64x64(t1, bpno_plus_one, (OPJ_INT32)orient); + } + } + break; + case 1: + if (type == T1_TYPE_RAW) { + opj_t1_dec_refpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); + } else { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); + } else { + opj_t1_dec_refpass_mqc_64x64(t1, bpno_plus_one); + } + } + break; + case 2: + opj_t1_dec_clnpass_64x64(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + break; + } - if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { - opj_mqc_resetstates(mqc); - opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); - opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); - opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); - } - if (++passtype == 3) { - passtype = 0; - bpno_plus_one--; - } + if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { + opj_mqc_resetstates(mqc); + opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); + opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); + opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); + } + if (++passtype == 3) { + passtype = 0; + bpno_plus_one--; + } + } + } + else + { + for (passno = 0; (passno < seg->real_num_passes) && (bpno_plus_one >= 1); ++passno) { + switch (passtype) { + case 0: + if (type == T1_TYPE_RAW) { + opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + } else { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one, (OPJ_INT32)orient); + } else { + opj_t1_dec_sigpass_mqc_generic(t1, bpno_plus_one, (OPJ_INT32)orient); + } + } + break; + case 1: + if (type == T1_TYPE_RAW) { + opj_t1_dec_refpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); + } else { + if (cblksty & J2K_CCP_CBLKSTY_VSC) { + opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); + } else { + opj_t1_dec_refpass_mqc_generic(t1, bpno_plus_one); + } + } + break; + case 2: + opj_t1_dec_clnpass_generic(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + break; + } + + if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) { + opj_mqc_resetstates(mqc); + opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46); + opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3); + opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4); + } + if (++passtype == 3) { + passtype = 0; + bpno_plus_one--; + } + } } } return OPJ_TRUE; From ba1edf6cd41415594729bc90ad3b0008af48251e Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 22 May 2016 00:50:34 +0200 Subject: [PATCH 05/33] Reduce number of occurrences of orient function argument This is essentially used to shift inside the lut_ctxno_zc, which we can precompute at the beginning of opj_t1_decode_cblk() / opj_t1_encode_cblk() --- src/lib/openjp2/mqc.h | 1 + src/lib/openjp2/t1.c | 127 +++++++++++++++--------------------------- 2 files changed, 47 insertions(+), 81 deletions(-) diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h index 574c599b..491ee50e 100644 --- a/src/lib/openjp2/mqc.h +++ b/src/lib/openjp2/mqc.h @@ -77,6 +77,7 @@ typedef struct opj_mqc { OPJ_BYTE *end; opj_mqc_state_t *ctxs[MQC_NUMCTXS]; opj_mqc_state_t **curctx; + const OPJ_BYTE *lut_ctxno_zc_orient; /* lut_ctxno_zc shifted by 256 * bandno */ #ifdef MQC_PERF_OPT unsigned char *buffer; #endif diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 07e358a5..30919585 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -45,7 +45,7 @@ /** @name Local static functions */ /*@{*/ -static INLINE OPJ_BYTE opj_t1_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient); +static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f); static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f); static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); @@ -58,7 +58,6 @@ Encode significant pass static void opj_t1_enc_sigpass_step(opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, @@ -82,20 +81,17 @@ static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf, OPJ_INT32 vsc); static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf); static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf, OPJ_INT32 vsc); @@ -105,7 +101,6 @@ Encode significant pass */ static void opj_t1_enc_sigpass( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 cblksty); @@ -116,12 +111,10 @@ Decode significant pass static void opj_t1_dec_sigpass_raw( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_INT32 orient, OPJ_INT32 cblksty); static void opj_t1_dec_sigpass_mqc_vsc( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 orient); + OPJ_INT32 bpno); @@ -202,7 +195,6 @@ static void opj_t1_enc_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, @@ -215,19 +207,16 @@ static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf); static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf); static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf, OPJ_INT32 partial, OPJ_INT32 vsc); @@ -237,7 +226,6 @@ Encode clean-up pass static void opj_t1_enc_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty); /** @@ -246,7 +234,6 @@ Decode clean-up pass static void opj_t1_dec_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_INT32 orient, OPJ_INT32 cblksty); static OPJ_FLOAT64 opj_t1_getwmsedec( @@ -298,8 +285,8 @@ static OPJ_BOOL opj_t1_allocate_buffers( opj_t1_t *t1, /* ----------------------------------------------------------------------- */ -static OPJ_BYTE opj_t1_getctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient) { - return lut_ctxno_zc[(orient << 8) | (f & T1_SIG_OTH)]; +static OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f) { + return mqc->lut_ctxno_zc_orient[(f & T1_SIG_OTH)]; } static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f) { @@ -358,7 +345,6 @@ static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stri static void opj_t1_enc_sigpass_step( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, @@ -374,7 +360,7 @@ static void opj_t1_enc_sigpass_step( opj_t1_t *t1, flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (OPJ_UINT32)(*flagsp); if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { v = (opj_int_abs(*datap) & one) ? 1 : 0; - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(flag, orient)); /* ESSAI */ + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); /* ESSAI */ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v); } else { @@ -400,14 +386,12 @@ static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf, OPJ_INT32 vsc) { OPJ_INT32 v, flag; opj_raw_t *raw = t1->raw; /* RAW component */ - OPJ_ARG_NOT_USED(orient); - + flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { if (opj_raw_decode(raw)) { @@ -423,7 +407,6 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf) { OPJ_INT32 v, flag; @@ -432,7 +415,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( flag = *flagsp; if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc((OPJ_UINT32)flag, (OPJ_UINT32)orient)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); @@ -447,7 +430,6 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf, OPJ_INT32 vsc) { @@ -457,7 +439,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc((OPJ_UINT32)flag, (OPJ_UINT32)orient)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); @@ -472,7 +454,6 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( static void opj_t1_enc_sigpass(opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_BYTE type, OPJ_UINT32 cblksty @@ -491,7 +472,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->data[(j * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -505,7 +485,6 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, static void opj_t1_dec_sigpass_raw( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_INT32 orient, OPJ_INT32 cblksty) { OPJ_INT32 one, half, oneplushalf, vsc; @@ -521,7 +500,6 @@ static void opj_t1_dec_sigpass_raw( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->data[(j * t1->w) + i], - orient, oneplushalf, vsc); } @@ -529,7 +507,7 @@ static void opj_t1_dec_sigpass_raw( } } /* VSC and BYPASS by Antonin */ -#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, orient, w, h, flags_stride) \ +#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf; \ OPJ_UINT32 i, j, k; \ @@ -543,16 +521,16 @@ static void opj_t1_dec_sigpass_raw( OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ data2 += w; \ } \ data1 += w << 2; \ @@ -563,7 +541,7 @@ static void opj_t1_dec_sigpass_raw( opj_flag_t *flags2 = flags1 + i; \ for (j = k; j < h; ++j) { \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ data2 += w; \ } \ } \ @@ -571,25 +549,22 @@ static void opj_t1_dec_sigpass_raw( static void opj_t1_dec_sigpass_mqc_64x64( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 orient) + OPJ_INT32 bpno) { - opj_t1_dec_sigpass_mqc_internal(t1, bpno, orient, 64, 64, 66); + opj_t1_dec_sigpass_mqc_internal(t1, bpno, 64, 64, 66); } static void opj_t1_dec_sigpass_mqc_generic( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 orient) + OPJ_INT32 bpno) { - opj_t1_dec_sigpass_mqc_internal(t1, bpno, orient, t1->w, t1->h, t1->flags_stride); + opj_t1_dec_sigpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride); } /* VSC and BYPASS by Antonin */ static void opj_t1_dec_sigpass_mqc_vsc( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 orient) + OPJ_INT32 bpno) { OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; @@ -604,7 +579,6 @@ static void opj_t1_dec_sigpass_mqc_vsc( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->data[(j * t1->w) + i], - orient, oneplushalf, vsc); } @@ -850,7 +824,6 @@ static void opj_t1_enc_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_UINT32 orient, OPJ_INT32 bpno, OPJ_INT32 one, OPJ_INT32 *nmsedec, @@ -867,7 +840,7 @@ static void opj_t1_enc_clnpass_step( goto LABEL_PARTIAL; } if (!(*flagsp & (T1_SIG | T1_VISIT))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(flag, orient)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); v = (opj_int_abs(*datap) & one) ? 1 : 0; opj_mqc_encode(mqc, (OPJ_UINT32)v); if (v) { @@ -886,14 +859,11 @@ static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf) { OPJ_INT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - OPJ_ARG_NOT_USED(orient); - flag = *flagsp; opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); @@ -906,7 +876,6 @@ static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf) { OPJ_INT32 v, flag; @@ -915,7 +884,7 @@ static void opj_t1_dec_clnpass_step( flag = *flagsp; if (!(flag & (T1_SIG | T1_VISIT))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc((OPJ_UINT32)flag, (OPJ_UINT32)orient)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); @@ -930,7 +899,6 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf) { OPJ_INT32 v; @@ -941,7 +909,7 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( flag = *flagsp; /*if (!(flag & (T1_SIG | T1_VISIT)))*/ { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc((OPJ_UINT32)flag, (OPJ_UINT32)orient)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); @@ -956,7 +924,6 @@ static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, - OPJ_INT32 orient, OPJ_INT32 oneplushalf, OPJ_INT32 partial, OPJ_INT32 vsc) @@ -970,7 +937,7 @@ static void opj_t1_dec_clnpass_step_vsc( goto LABEL_PARTIAL; } if (!(flag & (T1_SIG | T1_VISIT))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc((OPJ_UINT32)flag, (OPJ_UINT32)orient)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { LABEL_PARTIAL: opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); @@ -985,7 +952,6 @@ LABEL_PARTIAL: static void opj_t1_enc_clnpass( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_UINT32 orient, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty) { @@ -1037,7 +1003,6 @@ static void opj_t1_enc_clnpass( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], &t1->data[(j * t1->data_stride) + i], - orient, bpno, one, nmsedec, @@ -1050,7 +1015,7 @@ static void opj_t1_enc_clnpass( #define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] -#define opj_t1_dec_clnpass_internal(t1, bpno, orient, cblksty, w, h, flags_stride) \ +#define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ OPJ_UINT32 i, j, k; \ @@ -1090,7 +1055,6 @@ static void opj_t1_enc_clnpass( t1, \ &t1->flags[((j+1) * flags_stride) + i + 1], \ &t1->data[(j * w) + i], \ - orient, \ oneplushalf, \ agg && (j == k + (OPJ_UINT32)runlen), \ vsc); \ @@ -1121,9 +1085,9 @@ static void opj_t1_enc_clnpass( for (j = (OPJ_UINT32)runlen; j < 4 && j < h; ++j) { \ flags2 += flags_stride; \ if (agg && (j == (OPJ_UINT32)runlen)) { \ - opj_t1_dec_clnpass_step_partial(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step_partial(t1, flags2, data2, oneplushalf); \ } else { \ - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step(t1, flags2, data2, oneplushalf); \ } \ data2 += w; \ } \ @@ -1132,25 +1096,25 @@ static void opj_t1_enc_clnpass( flags2 += flags_stride; \ flag = *flags2; \ if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ flag = *flags2; \ if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ flag = *flags2; \ if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ flag = *flags2; \ if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ *flags2 &= ~T1_VISIT; \ data2 += w; \ } \ @@ -1163,7 +1127,7 @@ static void opj_t1_enc_clnpass( opj_flag_t *flags2 = flags1 + i; \ for (j = k; j < h; ++j) { \ flags2 += flags_stride; \ - opj_t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf); \ + opj_t1_dec_clnpass_step(t1, flags2, data2, oneplushalf); \ data2 += w; \ } \ } \ @@ -1187,19 +1151,17 @@ static void opj_t1_enc_clnpass( static void opj_t1_dec_clnpass_64x64( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_INT32 orient, OPJ_INT32 cblksty) { - opj_t1_dec_clnpass_internal(t1, bpno, orient, cblksty, 64, 64, 66); + opj_t1_dec_clnpass_internal(t1, bpno, cblksty, 64, 64, 66); } static void opj_t1_dec_clnpass_generic( opj_t1_t *t1, OPJ_INT32 bpno, - OPJ_INT32 orient, OPJ_INT32 cblksty) { - opj_t1_dec_clnpass_internal(t1, bpno, orient, cblksty, t1->w, t1->h, t1->flags_stride); + opj_t1_dec_clnpass_internal(t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride); } @@ -1443,6 +1405,8 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, { opj_raw_t *raw = t1->raw; /* RAW component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */ + + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; OPJ_INT32 bpno_plus_one; OPJ_UINT32 passtype; @@ -1488,12 +1452,12 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, switch (passtype) { case 0: if (type == T1_TYPE_RAW) { - opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); } else { if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one, (OPJ_INT32)orient); + opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one); } else { - opj_t1_dec_sigpass_mqc_64x64(t1, bpno_plus_one, (OPJ_INT32)orient); + opj_t1_dec_sigpass_mqc_64x64(t1, bpno_plus_one); } } break; @@ -1509,7 +1473,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } break; case 2: - opj_t1_dec_clnpass_64x64(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + opj_t1_dec_clnpass_64x64(t1, bpno_plus_one, (OPJ_INT32)cblksty); break; } @@ -1531,12 +1495,12 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, switch (passtype) { case 0: if (type == T1_TYPE_RAW) { - opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); } else { if (cblksty & J2K_CCP_CBLKSTY_VSC) { - opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one, (OPJ_INT32)orient); + opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one); } else { - opj_t1_dec_sigpass_mqc_generic(t1, bpno_plus_one, (OPJ_INT32)orient); + opj_t1_dec_sigpass_mqc_generic(t1, bpno_plus_one); } } break; @@ -1552,7 +1516,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } break; case 2: - opj_t1_dec_clnpass_generic(t1, bpno_plus_one, (OPJ_INT32)orient, (OPJ_INT32)cblksty); + opj_t1_dec_clnpass_generic(t1, bpno_plus_one, (OPJ_INT32)cblksty); break; } @@ -1695,6 +1659,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, OPJ_FLOAT64 cumwmsedec = 0.0; opj_mqc_t *mqc = t1->mqc; /* MQC component */ + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; OPJ_UINT32 passno; OPJ_INT32 bpno; @@ -1731,13 +1696,13 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, switch (passtype) { case 0: - opj_t1_enc_sigpass(t1, bpno, orient, &nmsedec, type, cblksty); + opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty); break; case 1: opj_t1_enc_refpass(t1, bpno, &nmsedec, type, cblksty); break; case 2: - opj_t1_enc_clnpass(t1, bpno, orient, &nmsedec, cblksty); + opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty); /* code switch SEGMARK (i.e. SEGSYM) */ if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) opj_mqc_segmark_enc(mqc); From 31882ad7f4a0d9d0231c3fdb9c75a6b69912e1b7 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 22 May 2016 00:54:06 +0200 Subject: [PATCH 06/33] Const'ify lut arrays so they are in the read-only data section --- src/lib/openjp2/t1_generate_luts.c | 14 +++++++------- src/lib/openjp2/t1_luts.h | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/lib/openjp2/t1_generate_luts.c b/src/lib/openjp2/t1_generate_luts.c index cba7245d..1e6e7b06 100644 --- a/src/lib/openjp2/t1_generate_luts.c +++ b/src/lib/openjp2/t1_generate_luts.c @@ -216,7 +216,7 @@ int main(int argc, char **argv) } } - printf("static OPJ_BYTE lut_ctxno_zc[1024] = {\n "); + printf("static const OPJ_BYTE lut_ctxno_zc[1024] = {\n "); for (i = 0; i < 1023; ++i) { printf("%i, ", lut_ctxno_zc[i]); if(!((i+1)&0x1f)) @@ -225,7 +225,7 @@ int main(int argc, char **argv) printf("%i\n};\n\n", lut_ctxno_zc[1023]); /* lut_ctxno_sc */ - printf("static OPJ_BYTE lut_ctxno_sc[256] = {\n "); + printf("static const OPJ_BYTE lut_ctxno_sc[256] = {\n "); for (i = 0; i < 255; ++i) { printf("0x%x, ", t1_init_ctxno_sc(i << 4)); if(!((i+1)&0xf)) @@ -234,7 +234,7 @@ int main(int argc, char **argv) printf("0x%x\n};\n\n", t1_init_ctxno_sc(255 << 4)); /* lut_spb */ - printf("static OPJ_BYTE lut_spb[256] = {\n "); + printf("static const OPJ_BYTE lut_spb[256] = {\n "); for (i = 0; i < 255; ++i) { printf("%i, ", t1_init_spb(i << 4)); if(!((i+1)&0x1f)) @@ -268,16 +268,16 @@ int main(int argc, char **argv) (int) (floor((u * u) * pow(2, T1_NMSEDEC_FRACBITS) + 0.5) / pow(2, T1_NMSEDEC_FRACBITS) * 8192.0)); } - printf("static OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_sig, 1 << T1_NMSEDEC_BITS); - printf("static OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_sig0, 1 << T1_NMSEDEC_BITS); - printf("static OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_ref, 1 << T1_NMSEDEC_BITS); - printf("static OPJ_INT16 lut_nmsedec_ref0[1 << T1_NMSEDEC_BITS] = {\n "); + printf("static const OPJ_INT16 lut_nmsedec_ref0[1 << T1_NMSEDEC_BITS] = {\n "); dump_array16(lut_nmsedec_ref0, 1 << T1_NMSEDEC_BITS); return 0; diff --git a/src/lib/openjp2/t1_luts.h b/src/lib/openjp2/t1_luts.h index 37776b65..c66a8aeb 100644 --- a/src/lib/openjp2/t1_luts.h +++ b/src/lib/openjp2/t1_luts.h @@ -1,6 +1,6 @@ /* This file was automatically generated by t1_generate_luts.c */ -static OPJ_BYTE lut_ctxno_zc[1024] = { +static const OPJ_BYTE lut_ctxno_zc[1024] = { 0, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -35,7 +35,7 @@ static OPJ_BYTE lut_ctxno_zc[1024] = { 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8, 2, 5, 5, 7, 5, 7, 7, 8, 5, 7, 7, 8, 7, 8, 8, 8 }; -static OPJ_BYTE lut_ctxno_sc[256] = { +static const OPJ_BYTE lut_ctxno_sc[256] = { 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xd, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd, 0x9, 0xa, 0xc, 0xb, 0xa, 0x9, 0xd, 0xc, 0xc, 0xb, 0xc, 0xb, 0xd, 0xc, 0xd, 0xc, 0x9, 0xa, 0xc, 0xb, 0xa, 0xa, 0xb, 0xb, 0xc, 0xd, 0x9, 0xa, 0xd, 0xd, 0xa, 0xa, @@ -54,7 +54,7 @@ static OPJ_BYTE lut_ctxno_sc[256] = { 0x9, 0xa, 0xc, 0xd, 0xa, 0xa, 0xd, 0xd, 0xc, 0xd, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd }; -static OPJ_BYTE lut_spb[256] = { +static const OPJ_BYTE lut_spb[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -65,7 +65,7 @@ static OPJ_BYTE lut_spb[256] = { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; -static OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -84,7 +84,7 @@ static OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = { 0x6c00, 0x6d80, 0x6f00, 0x7080, 0x7200, 0x7380, 0x7500, 0x7680 }; -static OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0080, 0x0080, 0x0080, 0x0080, 0x0100, 0x0100, 0x0100, 0x0180, 0x0180, 0x0200, 0x0200, 0x0280, 0x0280, 0x0300, 0x0300, 0x0380, 0x0400, 0x0400, @@ -103,7 +103,7 @@ static OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = { 0x7080, 0x7280, 0x7480, 0x7600, 0x7800, 0x7a00, 0x7c00, 0x7e00 }; -static OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = { 0x1800, 0x1780, 0x1700, 0x1680, 0x1600, 0x1580, 0x1500, 0x1480, 0x1400, 0x1380, 0x1300, 0x1280, 0x1200, 0x1180, 0x1100, 0x1080, 0x1000, 0x0f80, 0x0f00, 0x0e80, 0x0e00, 0x0d80, 0x0d00, 0x0c80, @@ -122,7 +122,7 @@ static OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = { 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780 }; -static OPJ_INT16 lut_nmsedec_ref0[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_ref0[1 << T1_NMSEDEC_BITS] = { 0x2000, 0x1f00, 0x1e00, 0x1d00, 0x1c00, 0x1b00, 0x1a80, 0x1980, 0x1880, 0x1780, 0x1700, 0x1600, 0x1500, 0x1480, 0x1380, 0x1300, 0x1200, 0x1180, 0x1080, 0x1000, 0x0f00, 0x0e80, 0x0e00, 0x0d00, From 1da397e94a4e441a7c9a1aa4c2debd1c06ba05e2 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 23 May 2016 01:33:06 +0200 Subject: [PATCH 07/33] Tier 1 decoding: add a colflags array Addition flag array such that colflags[1+0] is for state of col=0,row=0..3, colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ... This array avoids too much cache trashing when processing by 4 vertical samples as done in the various decoding steps. --- src/lib/openjp2/t1.c | 400 +++++++++++++++++++++++++++++++------------ src/lib/openjp2/t1.h | 36 +++- 2 files changed, 327 insertions(+), 109 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 30919585..9ad6ffd0 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -39,6 +39,8 @@ #include "opj_includes.h" #include "t1_luts.h" +/* #define CONSISTENCY_CHECK */ + /** @defgroup T1 T1 - Implementation of the tier-1 coding */ /*@{*/ @@ -51,7 +53,7 @@ static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f); static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f); static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos); static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos); -static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride); +static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride); /** Encode significant pass */ @@ -80,20 +82,26 @@ static void opj_t1_dec_sigpass_step(opj_t1_t *t1, static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 vsc); + OPJ_INT32 vsc, + OPJ_INT32 row); static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t* colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf); + OPJ_INT32 oneplushalf, + OPJ_INT32 row); static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 vsc); + OPJ_INT32 vsc, + OPJ_INT32 row); /** @@ -168,23 +176,28 @@ static void opj_t1_dec_refpass_step(opj_t1_t *t1, static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 vsc); + OPJ_INT32 row); static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, - OPJ_INT32 *datap, - OPJ_INT32 poshalf, - OPJ_INT32 neghalf); -static INLINE void opj_t1_dec_refpass_step_mqc_vsc( - opj_t1_t *t1, - opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 vsc); + OPJ_INT32 row); +static INLINE void opj_t1_dec_refpass_step_mqc_vsc( + opj_t1_t *t1, + opj_flag_t *flagsp, + opj_colflag_t *colflagsp, + OPJ_INT32 *datap, + OPJ_INT32 poshalf, + OPJ_INT32 neghalf, + OPJ_INT32 vsc, + OPJ_INT32 row); @@ -206,20 +219,26 @@ Decode clean-up pass static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf); + OPJ_INT32 oneplushalf, + OPJ_INT32 row); static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf); + OPJ_INT32 oneplushalf, + OPJ_INT32 row); static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 partial, - OPJ_INT32 vsc); + OPJ_INT32 vsc, + OPJ_INT32 row); /** Encode clean-up pass */ @@ -319,7 +338,7 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos) { return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)]; } -static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride) { +static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stride) { opj_flag_t *np = flagsp - stride; opj_flag_t *sp = flagsp + stride; @@ -342,6 +361,47 @@ static void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT32 stri sp[1] |= T1_SIG_NW; } +static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_UINT32 s, OPJ_UINT32 stride, OPJ_INT32 row) +{ + opj_t1_updateflags(flagsp, s, stride); + if( row == 0 ) + { + *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *(colflagsp - stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); + *(colflagsp - stride) |= (T1_COLFLAG_SIG_OTHER_ROW_3); + *(colflagsp - stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); + } + else if( row == 3 ) + { + *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))); + *(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))); + *(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS* (row-1))); + *(colflagsp + stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0); + *(colflagsp + stride) |= (T1_COLFLAG_SIG_OTHER_ROW_0); + *(colflagsp + stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0); + } + else + { + *(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + } +} + static void opj_t1_enc_sigpass_step( opj_t1_t *t1, opj_flag_t *flagsp, OPJ_INT32 *datap, @@ -385,68 +445,88 @@ static void opj_t1_enc_sigpass_step( opj_t1_t *t1, static INLINE void opj_t1_dec_sigpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 vsc) + OPJ_INT32 vsc, + OPJ_INT32 row) { OPJ_INT32 v, flag; opj_raw_t *raw = t1->raw; /* RAW component */ flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); - if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { + if ((flag & T1_SIG_OTH) && !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) { if (opj_raw_decode(raw)) { v = (OPJ_INT32)opj_raw_decode(raw); /* ESSAI */ *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); } +#ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; +#endif + *colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); } } static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t* colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf) + OPJ_INT32 oneplushalf, + OPJ_INT32 row) { OPJ_INT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - - flag = *flagsp; - if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { +#ifdef CONSISTENCY_CHECK + assert( ((*flagsp & T1_SIG_OTH) && !(*flagsp & (T1_SIG | T1_VISIT))) == + ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) == + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))) ); +#endif + if( (*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) == + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) ) { + flag = *flagsp; opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); } +#ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; +#endif + *colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 vsc) + OPJ_INT32 vsc, + OPJ_INT32 row) { OPJ_INT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); - if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) { + if ((flag & T1_SIG_OTH) && !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); } +#ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; +#endif + *colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -489,21 +569,26 @@ static void opj_t1_dec_sigpass_raw( { OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; + opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; half = one >> 1; oneplushalf = one | half; for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { + opj_colflag_t *colflags2 = colflags1 + i; for (j = k; j < k + 4 && j < t1->h; ++j) { vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; opj_t1_dec_sigpass_step_raw( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], + colflags2, &t1->data[(j * t1->w) + i], oneplushalf, - vsc); + vsc, + j - k); } } + colflags1 += t1->flags_stride; } } /* VSC and BYPASS by Antonin */ @@ -513,6 +598,7 @@ static void opj_t1_dec_sigpass_raw( OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ opj_flag_t *flags1 = &t1->flags[1]; \ + opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ one = 1 << bpno; \ half = one >> 1; \ oneplushalf = one | half; \ @@ -520,28 +606,32 @@ static void opj_t1_dec_sigpass_raw( for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ + opj_colflag_t *colflags2 = colflags1 + i; \ + if( *colflags2 == 0 ) continue; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3); \ data2 += w; \ } \ data1 += w << 2; \ flags1 += flags_stride << 2; \ + colflags1 += flags_stride; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ + opj_colflag_t *colflags2 = colflags1 + i; \ for (j = k; j < h; ++j) { \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, j - k); \ data2 += w; \ } \ } \ @@ -568,21 +658,26 @@ static void opj_t1_dec_sigpass_mqc_vsc( { OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; + opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; half = one >> 1; oneplushalf = one | half; for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { + opj_colflag_t *colflags2 = colflags1 + i; for (j = k; j < k + 4 && j < t1->h; ++j) { vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; opj_t1_dec_sigpass_step_mqc_vsc( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], + colflags2, &t1->data[(j * t1->w) + i], oneplushalf, - vsc); + vsc, + j - k); } } + colflags1 += t1->flags_stride; } } /* VSC and BYPASS by Antonin */ @@ -619,64 +714,81 @@ static void opj_t1_enc_refpass_step( opj_t1_t *t1, static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 vsc) + OPJ_INT32 row) { - OPJ_INT32 v, t, flag; + OPJ_INT32 v, t; opj_raw_t *raw = t1->raw; /* RAW component */ - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); - if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { + if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == + ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { v = (OPJ_INT32)opj_raw_decode(raw); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *flagsp |= T1_REFINE; + *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, +#ifdef CONSISTENCY_CHECK opj_flag_t *flagsp, +#else + opj_flag_t *flagsp_unused, +#endif + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, - OPJ_INT32 neghalf) + OPJ_INT32 neghalf, + OPJ_INT32 row) { - OPJ_INT32 v, t, flag; + OPJ_INT32 v, t; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - - flag = *flagsp; - if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag((OPJ_UINT32)flag)); /* ESSAI */ +#ifdef CONSISTENCY_CHECK + assert( ((*flagsp & (T1_SIG | T1_VISIT)) == T1_SIG) == + ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) ); +#endif + if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == + ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { + OPJ_UINT32 tmp1 = (*colflagsp & (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; + OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1; + opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */ v = opj_mqc_decode(mqc); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *flagsp |= T1_REFINE; + *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 vsc) + OPJ_INT32 vsc, + OPJ_INT32 row) { OPJ_INT32 v, t, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); - if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag((OPJ_UINT32)flag)); /* ESSAI */ + if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == + ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { + OPJ_INT32 flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); + OPJ_UINT32 tmp1 = (flag & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; + OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1; + opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */ v = opj_mqc_decode(mqc); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *flagsp |= T1_REFINE; + *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -719,22 +831,24 @@ static void opj_t1_dec_refpass_raw( OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; OPJ_INT32 vsc; + opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { + opj_colflag_t *colflags2 = colflags1 + i; for (j = k; j < k + 4 && j < t1->h; ++j) { - vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0; opj_t1_dec_refpass_step_raw( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], + colflags2, &t1->data[(j * t1->w) + i], poshalf, - neghalf, - vsc); + neghalf, j - k); } } + colflags1 += t1->flags_stride; } } /* VSC and BYPASS by Antonin */ @@ -744,6 +858,7 @@ static void opj_t1_dec_refpass_raw( OPJ_UINT32 i, j, k; \ OPJ_INT32 *data1 = t1->data; \ opj_flag_t *flags1 = &t1->flags[1]; \ + opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ one = 1 << bpno; \ poshalf = one >> 1; \ neghalf = bpno > 0 ? -poshalf : -1; \ @@ -751,28 +866,32 @@ static void opj_t1_dec_refpass_raw( for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ + opj_colflag_t *colflags2 = colflags1 + i; \ + if( *colflags2 == 0 ) continue; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 0); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 1); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 2); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 3); \ data2 += w; \ } \ data1 += w << 2; \ flags1 += flags_stride << 2; \ + colflags1 += flags_stride; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ + opj_colflag_t *colflags2 = colflags1 + i; \ for (j = k; j < h; ++j) { \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, j - k); \ data2 += w; \ } \ } \ @@ -800,22 +919,26 @@ static void opj_t1_dec_refpass_mqc_vsc( OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; OPJ_INT32 vsc; + opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; for (k = 0; k < t1->h; k += 4) { for (i = 0; i < t1->w; ++i) { + opj_colflag_t *colflags2 = colflags1 + i; for (j = k; j < k + 4 && j < t1->h; ++j) { vsc = ((j == k + 3 || j == t1->h - 1)) ? 1 : 0; opj_t1_dec_refpass_step_mqc_vsc( t1, &t1->flags[((j+1) * t1->flags_stride) + i + 1], + colflags2, &t1->data[(j * t1->w) + i], poshalf, neghalf, - vsc); + vsc, j - k); } } + colflags1 += t1->flags_stride; } } /* VSC and BYPASS by Antonin */ @@ -858,8 +981,10 @@ LABEL_PARTIAL: static void opj_t1_dec_clnpass_step_partial( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf) + OPJ_INT32 oneplushalf, + OPJ_INT32 row) { OPJ_INT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -868,38 +993,48 @@ static void opj_t1_dec_clnpass_step_partial( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); +#ifdef CONSISTENCY_CHECK *flagsp &= ~T1_VISIT; +#endif } /* VSC and BYPASS by Antonin */ static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf) + OPJ_INT32 oneplushalf, + OPJ_INT32 row) { OPJ_INT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - - flag = *flagsp; - if (!(flag & (T1_SIG | T1_VISIT))) { +#ifdef CONSISTENCY_CHECK + assert( (!(*flagsp & (T1_SIG | T1_VISIT))) == (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (4*row)))) ); +#endif + if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (4*row)))) { + flag = *flagsp; opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); } } +#ifdef CONSISTENCY_CHECK *flagsp &= ~T1_VISIT; +#endif } /* VSC and BYPASS by Antonin */ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, - OPJ_INT32 oneplushalf) + OPJ_INT32 oneplushalf, + OPJ_INT32 row) { OPJ_INT32 v; OPJ_INT32 flag; @@ -914,7 +1049,7 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); } } /*flagsp &= ~T1_VISIT;*/ @@ -923,10 +1058,12 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 partial, - OPJ_INT32 vsc) + OPJ_INT32 vsc, + OPJ_INT32 row) { OPJ_INT32 v, flag; @@ -936,17 +1073,19 @@ static void opj_t1_dec_clnpass_step_vsc( if (partial) { goto LABEL_PARTIAL; } - if (!(flag & (T1_SIG | T1_VISIT))) { + if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) { opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); if (opj_mqc_decode(mqc)) { LABEL_PARTIAL: opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); + opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); } } +#ifdef CONSISTENCY_CHECK *flagsp &= ~T1_VISIT; +#endif } static void opj_t1_enc_clnpass( @@ -1015,7 +1154,7 @@ static void opj_t1_enc_clnpass( #define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)] -#define opj_t1_dec_clnpass_internal(t1, bpno, cblksty, w, h, flags_stride) \ +#define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ OPJ_UINT32 i, j, k; \ @@ -1027,14 +1166,17 @@ static void opj_t1_enc_clnpass( half = one >> 1; \ oneplushalf = one | half; \ if (cblksty & J2K_CCP_CBLKSTY_VSC) { \ + opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ for (k = 0; k < h; k += 4) { \ for (i = 0; i < w; ++i) { \ + opj_colflag_t *colflags2 = colflags1 + i; \ if (k + 3 < h) { \ - agg = !(MACRO_t1_flags_internal(1 + k,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ - || MACRO_t1_flags_internal(1 + k + 1,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ - || MACRO_t1_flags_internal(1 + k + 2,1 + i,flags_stride) & (T1_SIG | T1_VISIT | T1_SIG_OTH) \ - || (MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \ - & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); \ + agg = !((*colflags2 & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \ + T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \ + T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \ + T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3)) || \ + ((MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \ + & ((~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG_OTH)))); \ } else { \ agg = 0; \ } \ @@ -1054,24 +1196,36 @@ static void opj_t1_enc_clnpass( opj_t1_dec_clnpass_step_vsc( \ t1, \ &t1->flags[((j+1) * flags_stride) + i + 1], \ + colflags2, \ &t1->data[(j * w) + i], \ oneplushalf, \ agg && (j == k + (OPJ_UINT32)runlen), \ - vsc); \ + vsc, j - k); \ } \ + *colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ } \ + colflags1 += flags_stride; \ } \ } else { \ OPJ_INT32 *data1 = t1->data; \ opj_flag_t *flags1 = &t1->flags[1]; \ + opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \ for (k = 0; k < (h & ~3u); k += 4) { \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ - agg = !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \ - MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)); \ + opj_colflag_t *colflags2 = colflags1 + i; \ + opj_colflag_t colflags = *colflags2; \ + agg = !(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \ + T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \ + T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \ + T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3 | T1_COLFLAG_SIG_OTHER_ROW_3)); \ + if( consistency_check ) { \ + assert( agg == !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \ + MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \ + MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \ + MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)) ); \ + } \ if (agg) { \ opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \ if (!opj_mqc_decode(mqc)) { \ @@ -1084,52 +1238,59 @@ static void opj_t1_enc_clnpass( data2 += (OPJ_UINT32)runlen * w; \ for (j = (OPJ_UINT32)runlen; j < 4 && j < h; ++j) { \ flags2 += flags_stride; \ - if (agg && (j == (OPJ_UINT32)runlen)) { \ - opj_t1_dec_clnpass_step_partial(t1, flags2, data2, oneplushalf); \ + if (j == (OPJ_UINT32)runlen) { \ + opj_t1_dec_clnpass_step_partial(t1, flags2, colflags2, data2, oneplushalf, j); \ } else { \ - opj_t1_dec_clnpass_step(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j); \ } \ data2 += w; \ } \ } else { \ - opj_flag_t flag; \ flags2 += flags_stride; \ - flag = *flags2; \ - if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ - *flags2 &= ~T1_VISIT; \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0); \ + } \ + if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ - flag = *flags2; \ - if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ - *flags2 &= ~T1_VISIT; \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1); \ + } \ + if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ - flag = *flags2; \ - if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ - *flags2 &= ~T1_VISIT; \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2); \ + } \ + if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ - flag = *flags2; \ - if (!(flag & (T1_SIG | T1_VISIT))) \ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, data2, oneplushalf); \ - *flags2 &= ~T1_VISIT; \ + if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ + if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3); \ + } \ + if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ } \ + *colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ } \ data1 += w << 2; \ flags1 += flags_stride << 2; \ + colflags1 += flags_stride; \ } \ for (i = 0; i < w; ++i) { \ OPJ_INT32 *data2 = data1 + i; \ opj_flag_t *flags2 = flags1 + i; \ + opj_colflag_t *colflags2 = colflags1 + i; \ for (j = k; j < h; ++j) { \ flags2 += flags_stride; \ - opj_t1_dec_clnpass_step(t1, flags2, data2, oneplushalf); \ + opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j - k); \ data2 += w; \ } \ + *colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ } \ } \ \ @@ -1153,7 +1314,11 @@ static void opj_t1_dec_clnpass_64x64( OPJ_INT32 bpno, OPJ_INT32 cblksty) { - opj_t1_dec_clnpass_internal(t1, bpno, cblksty, 64, 64, 66); +#ifdef CONSISTENCY_CHECK + opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, 64, 64, 66); +#else + opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, 64, 64, 66); +#endif } static void opj_t1_dec_clnpass_generic( @@ -1161,7 +1326,11 @@ static void opj_t1_dec_clnpass_generic( OPJ_INT32 bpno, OPJ_INT32 cblksty) { - opj_t1_dec_clnpass_internal(t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride); +#ifdef CONSISTENCY_CHECK + opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride); +#else + opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, t1->w, t1->h, t1->flags_stride); +#endif } @@ -1234,6 +1403,21 @@ static OPJ_BOOL opj_t1_allocate_buffers( t1->flagssize=flagssize; } memset(t1->flags,0,flagssize * sizeof(opj_flag_t)); + + if (!t1->encoder) { + OPJ_UINT32 colflags_size=t1->flags_stride * ((h+3) / 4 + 2); + + if(colflags_size > t1->colflags_size){ + opj_aligned_free(t1->colflags); + t1->colflags = (opj_colflag_t*) opj_aligned_malloc(colflags_size * sizeof(opj_colflag_t)); + if(!t1->colflags){ + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + t1->colflags_size=colflags_size; + } + memset(t1->colflags,0,colflags_size * sizeof(opj_colflag_t)); + } t1->w=w; t1->h=h; @@ -1304,6 +1488,10 @@ void opj_t1_destroy(opj_t1_t *p_t1) p_t1->flags = 00; } + if (p_t1->colflags) { + opj_aligned_free(p_t1->colflags); + p_t1->colflags = 00; + } opj_free(p_t1); } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index e9d3db57..22557d96 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -52,6 +52,7 @@ in T1.C are used by some function in TCD.C. /* CAUTION: the value of those constants must not be changed, otherwise the */ /* optimization of opj_t1_updateflags() will break! */ +/* BEGINNING of flags that apply to opj_flag_t */ #define T1_SIG_NE 0x0001 /**< Context orientation : North-East direction */ #define T1_SIG_SE 0x0002 /**< Context orientation : South-East direction */ #define T1_SIG_SW 0x0004 /**< Context orientation : South-West direction */ @@ -69,9 +70,10 @@ in T1.C are used by some function in TCD.C. #define T1_SGN_W 0x0800 #define T1_SGN (T1_SGN_N|T1_SGN_E|T1_SGN_S|T1_SGN_W) -#define T1_SIG 0x1000 -#define T1_REFINE 0x2000 -#define T1_VISIT 0x4000 +#define T1_SIG 0x1000 /**< No longer used by decoder */ +#define T1_REFINE 0x2000 /**< No longer used by decoder */ +#define T1_VISIT 0x4000 /**< No longer used by decoder */ +/* END of flags that apply to opj_flag_t */ #define T1_NUMCTXS_ZC 9 #define T1_NUMCTXS_SC 5 @@ -91,10 +93,32 @@ in T1.C are used by some function in TCD.C. #define T1_TYPE_MQ 0 /**< Normal coding using entropy coder */ #define T1_TYPE_RAW 1 /**< No encoding the information is store under raw format in codestream (mode switch RAW)*/ +/* Those flags are used by opj_colflag_t */ +#define T1_COLFLAG_RBS 4 /* RBS = Row Bit Shift */ +#define T1_COLFLAG_SIG_OTHER_ROW_0 (1 << 0) /**< This sample has at least one significant neighbour */ +#define T1_COLFLAG_SIG_ROW_0 (1 << 1) /**< This sample is significant */ +#define T1_COLFLAG_VISIT_ROW_0 (1 << 2) /**< This sample has been visited */ +#define T1_COLFLAG_REFINE_ROW_0 (1 << 3) /**< This sample has been refined */ +#define T1_COLFLAG_SIG_OTHER_ROW_1 (T1_COLFLAG_SIG_OTHER_ROW_0 << T1_COLFLAG_RBS) +#define T1_COLFLAG_SIG_ROW_1 (T1_COLFLAG_SIG_ROW_0 << T1_COLFLAG_RBS) +#define T1_COLFLAG_VISIT_ROW_1 (T1_COLFLAG_VISIT_ROW_0 << T1_COLFLAG_RBS) +#define T1_COLFLAG_REFINE_ROW_1 (T1_COLFLAG_REFINE_ROW_0 << T1_COLFLAG_RBS) +#define T1_COLFLAG_SIG_OTHER_ROW_2 (T1_COLFLAG_SIG_OTHER_ROW_0 << (2*T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_ROW_2 (T1_COLFLAG_SIG_ROW_0 << (2*T1_COLFLAG_RBS)) +#define T1_COLFLAG_VISIT_ROW_2 (T1_COLFLAG_VISIT_ROW_0 << (2*T1_COLFLAG_RBS)) +#define T1_COLFLAG_REFINE_ROW_2 (T1_COLFLAG_REFINE_ROW_0 << (2*T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_OTHER_ROW_3 (T1_COLFLAG_SIG_OTHER_ROW_0 << (3*T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_ROW_3 (T1_COLFLAG_SIG_ROW_0 << (3*T1_COLFLAG_RBS)) +#define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3*T1_COLFLAG_RBS)) +#define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3*T1_COLFLAG_RBS)) + /* ----------------------------------------------------------------------- */ typedef OPJ_INT16 opj_flag_t; +/** Flags for 4 consecutive rows of a column */ +typedef OPJ_UINT16 opj_colflag_t; + /** Tier-1 coding (coding of code-block coefficients) */ @@ -107,11 +131,17 @@ typedef struct opj_t1 { OPJ_INT32 *data; opj_flag_t *flags; + /** Addition flag array such that colflags[1+0] is for state of col=0,row=0..3, + colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ... + This array avoids too much cache trashing when processing by 4 vertical samples + as done in the various decoding steps. */ + opj_colflag_t* colflags; OPJ_UINT32 w; OPJ_UINT32 h; OPJ_UINT32 datasize; OPJ_UINT32 flagssize; OPJ_UINT32 flags_stride; + OPJ_UINT32 colflags_size; OPJ_UINT32 data_stride; OPJ_BOOL encoder; } opj_t1_t; From 93f7f907117675cf6af227b4d0243a58f6c3640a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 23 May 2016 10:25:55 +0200 Subject: [PATCH 08/33] opj_t1_decode_cblks(): tiny perf increase when loop unrolling --- src/lib/openjp2/t1.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 9ad6ffd0..89a7ff85 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1559,7 +1559,18 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, if (tccp->qmfbid == 1) { OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { + i = 0; + for (; i < (cblk_w & ~3); i += 4) { + OPJ_INT32 tmp0 = datap[(j * cblk_w) + i]; + OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1]; + OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2]; + OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3]; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2; + } + for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2; } From 956c31d5a6e4530a92b6dd6099bdbf071144f6f1 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 23 May 2016 10:34:44 +0200 Subject: [PATCH 09/33] opj_t1_dec_clnpass(): remove useless test in the runlen decoding path (of the non VSC case) --- src/lib/openjp2/t1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 89a7ff85..a2c5dc6a 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1236,7 +1236,7 @@ static void opj_t1_enc_clnpass( runlen = (runlen << 1) | opj_mqc_decode(mqc); \ flags2 += (OPJ_UINT32)runlen * flags_stride; \ data2 += (OPJ_UINT32)runlen * w; \ - for (j = (OPJ_UINT32)runlen; j < 4 && j < h; ++j) { \ + for (j = (OPJ_UINT32)runlen; j < 4; ++j) { \ flags2 += flags_stride; \ if (j == (OPJ_UINT32)runlen) { \ opj_t1_dec_clnpass_step_partial(t1, flags2, colflags2, data2, oneplushalf, j); \ From 8371491a9968a31ce16d6ce37b775ef3c7d090c8 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 23 May 2016 11:44:54 +0200 Subject: [PATCH 10/33] Better inlining of opj_t1_updateflagscolflags() w.r.t. flags_stride --- src/lib/openjp2/t1.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index a2c5dc6a..0023ad74 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -93,7 +93,8 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row); + OPJ_INT32 row, + OPJ_INT32 flags_stride); static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -474,7 +475,8 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row) + OPJ_INT32 row, + OPJ_INT32 flags_stride) { OPJ_INT32 v, flag; @@ -492,7 +494,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, flags_stride, row); } #ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; @@ -609,16 +611,16 @@ static void opj_t1_dec_sigpass_raw( opj_colflag_t *colflags2 = colflags1 + i; \ if( *colflags2 == 0 ) continue; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0, flags_stride); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1, flags_stride); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2, flags_stride); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3, flags_stride); \ data2 += w; \ } \ data1 += w << 2; \ @@ -631,7 +633,7 @@ static void opj_t1_dec_sigpass_raw( opj_colflag_t *colflags2 = colflags1 + i; \ for (j = k; j < h; ++j) { \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, j - k); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, j - k, flags_stride); \ data2 += w; \ } \ } \ @@ -1034,7 +1036,8 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row) + OPJ_INT32 row, + OPJ_INT32 flags_stride) { OPJ_INT32 v; OPJ_INT32 flag; @@ -1049,7 +1052,7 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, v, flags_stride, row); } } /*flagsp &= ~T1_VISIT;*/ @@ -1249,28 +1252,28 @@ static void opj_t1_enc_clnpass( flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ From 107eb31531ca688e2799406e69e9383efc13448f Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 23 May 2016 13:45:15 +0200 Subject: [PATCH 11/33] Improve perf of opj_t1_dec_sigpass_mqc_vsc() and opj_t1_dec_refpass_mqc_vsc() with loop unrolling --- src/lib/openjp2/t1.c | 92 ++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 0023ad74..277261d7 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -660,27 +660,48 @@ static void opj_t1_dec_sigpass_mqc_vsc( { OPJ_INT32 one, half, oneplushalf, vsc; OPJ_UINT32 i, j, k; - opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; + OPJ_INT32 *data1 = t1->data; + opj_flag_t *flags1 = &t1->flags[1]; + opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; half = one >> 1; oneplushalf = one | half; - for (k = 0; k < t1->h; k += 4) { + for (k = 0; k < (t1->h & ~3); k += 4) { for (i = 0; i < t1->w; ++i) { + OPJ_INT32 *data2 = data1 + i; + opj_flag_t *flags2 = flags1 + i; opj_colflag_t *colflags2 = colflags1 + i; - for (j = k; j < k + 4 && j < t1->h; ++j) { - vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0; - opj_t1_dec_sigpass_step_mqc_vsc( - t1, - &t1->flags[((j+1) * t1->flags_stride) + i + 1], - colflags2, - &t1->data[(j * t1->w) + i], - oneplushalf, - vsc, - j - k); - } + flags2 += t1->flags_stride; + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 0); + data2 += t1->w; + flags2 += t1->flags_stride; + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 1); + data2 += t1->w; + flags2 += t1->flags_stride; + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 2); + data2 += t1->w; + flags2 += t1->flags_stride; + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 1, 3); + data2 += t1->w; } + data1 += t1->w << 2; + flags1 += t1->flags_stride << 2; colflags1 += t1->flags_stride; } + for (i = 0; i < t1->w; ++i) { + opj_colflag_t *colflags2 = colflags1 + i; + for (j = k; j < t1->h; ++j) { + vsc = (j == t1->h - 1) ? 1 : 0; + opj_t1_dec_sigpass_step_mqc_vsc( + t1, + &t1->flags[((j+1) * t1->flags_stride) + i + 1], + colflags2, + &t1->data[(j * t1->w) + i], + oneplushalf, + vsc, + j - k); + } + } } /* VSC and BYPASS by Antonin */ @@ -921,27 +942,48 @@ static void opj_t1_dec_refpass_mqc_vsc( OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; OPJ_INT32 vsc; + OPJ_INT32 *data1 = t1->data; + opj_flag_t *flags1 = &t1->flags[1]; opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; - for (k = 0; k < t1->h; k += 4) { + for (k = 0; k < (t1->h & ~3); k += 4) { for (i = 0; i < t1->w; ++i) { + OPJ_INT32 *data2 = data1 + i; + opj_flag_t *flags2 = flags1 + i; opj_colflag_t *colflags2 = colflags1 + i; - for (j = k; j < k + 4 && j < t1->h; ++j) { - vsc = ((j == k + 3 || j == t1->h - 1)) ? 1 : 0; - opj_t1_dec_refpass_step_mqc_vsc( - t1, - &t1->flags[((j+1) * t1->flags_stride) + i + 1], - colflags2, - &t1->data[(j * t1->w) + i], - poshalf, - neghalf, - vsc, j - k); - } + flags2 += t1->flags_stride; + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 0); + data2 += t1->w; + flags2 += t1->flags_stride; + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 1); + data2 += t1->w; + flags2 += t1->flags_stride; + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 2); + data2 += t1->w; + flags2 += t1->flags_stride; + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 1, 3); + data2 += t1->w; } + data1 += t1->w << 2; + flags1 += t1->flags_stride << 2; colflags1 += t1->flags_stride; } + for (i = 0; i < t1->w; ++i) { + opj_colflag_t *colflags2 = colflags1 + i; + for (j = k; j < t1->h; ++j) { + vsc = (j == t1->h - 1) ? 1 : 0; + opj_t1_dec_refpass_step_mqc_vsc( + t1, + &t1->flags[((j+1) * t1->flags_stride) + i + 1], + colflags2, + &t1->data[(j * t1->w) + i], + poshalf, neghalf, + vsc, + j - k); + } + } } /* VSC and BYPASS by Antonin */ From 7092f7ea112fcc44e7426c462bf01a406b076620 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 23 May 2016 16:00:04 +0200 Subject: [PATCH 12/33] Fix MSVC210 build issue (use of C99 declaration after statement) introduced in ba1edf6cd41415594729bc90ad3b0008af48251e --- src/lib/openjp2/t1.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 277261d7..e1097bf5 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1650,13 +1650,13 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, opj_raw_t *raw = t1->raw; /* RAW component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */ - mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; - OPJ_INT32 bpno_plus_one; OPJ_UINT32 passtype; OPJ_UINT32 segno, passno; OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */ + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; + if(!opj_t1_allocate_buffers( t1, (OPJ_UINT32)(cblk->x1 - cblk->x0), @@ -1903,7 +1903,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, OPJ_FLOAT64 cumwmsedec = 0.0; opj_mqc_t *mqc = t1->mqc; /* MQC component */ - mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; OPJ_UINT32 passno; OPJ_INT32 bpno; @@ -1914,6 +1913,8 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, OPJ_BYTE type = T1_TYPE_MQ; OPJ_FLOAT64 tempwmsedec; + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256; + max = 0; for (i = 0; i < t1->w; ++i) { for (j = 0; j < t1->h; ++j) { From 54179fe1d53156c6b440166fe71a10f238a6ea56 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 16:34:52 +0200 Subject: [PATCH 13/33] Add threading and thread pool API --- src/lib/openjp2/CMakeLists.txt | 40 ++ src/lib/openjp2/openjpeg.h | 13 + src/lib/openjp2/opj_includes.h | 2 + src/lib/openjp2/thread.c | 959 +++++++++++++++++++++++++++++++++ src/lib/openjp2/thread.h | 253 +++++++++ 5 files changed, 1267 insertions(+) create mode 100644 src/lib/openjp2/thread.c create mode 100644 src/lib/openjp2/thread.h diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index c02a9948..f45ceb34 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -9,6 +9,8 @@ include_directories( ) # Defines the source code for the library set(OPENJPEG_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/thread.c + ${CMAKE_CURRENT_SOURCE_DIR}/thread.h ${CMAKE_CURRENT_SOURCE_DIR}/bio.c ${CMAKE_CURRENT_SOURCE_DIR}/bio.h ${CMAKE_CURRENT_SOURCE_DIR}/cio.c @@ -74,6 +76,11 @@ if(OPJ_DISABLE_TPSOT_FIX) add_definitions(-DOPJ_DISABLE_TPSOT_FIX) endif() +# Special case for old i586-mingw32msvc-gcc cross compiler +if(NOT WIN32 AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER MATCHES ".*mingw32msvc.*" ) + set(WIN32 YES) +endif() + # Build the library if(WIN32) if(BUILD_SHARED_LIBS) @@ -143,3 +150,36 @@ if(OPJ_USE_DSYMUTIL) DEPENDS ${OPENJPEG_LIBRARY_NAME}) endif() endif() + +################################################################################# +# threading configuration +################################################################################# +set(CMAKE_THREAD_PREFER_PTHREAD TRUE) + +option(USE_THREAD "Build with thread/mutex support " ON) +if(NOT USE_THREAD) + add_definitions( -DMUTEX_stub) +endif(NOT USE_THREAD) + +find_package(Threads QUIET) + +if(USE_THREAD AND WIN32 AND NOT Threads_FOUND ) + add_definitions( -DMUTEX_win32) + set(Threads_FOUND YES) +endif() + +if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT ) + add_definitions( -DMUTEX_win32) +endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT ) + +if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT ) + add_definitions( -DMUTEX_pthread) +endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT ) + +if(USE_THREAD AND NOT Threads_FOUND) + message(FATAL_ERROR "No thread library found and thread/mutex support is required by USE_THREAD option") +endif(USE_THREAD AND NOT Threads_FOUND) + +if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT}) +endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) diff --git a/src/lib/openjp2/openjpeg.h b/src/lib/openjp2/openjpeg.h index c07e9c84..369693df 100644 --- a/src/lib/openjp2/openjpeg.h +++ b/src/lib/openjp2/openjpeg.h @@ -1554,6 +1554,19 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_set_MCT( opj_cparameters_t *parameters, OPJ_INT32 * p_dc_shift, OPJ_UINT32 pNbComp); +/* +========================================================== + Thread functions +========================================================== +*/ + +/** Returns if the library is built with thread support. + * OPJ_TRUE if mutex, condition, thread, thread pool are available. + */ +OPJ_API OPJ_BOOL OPJ_CALLCONV opj_has_thread_support(void); + +/** Return the number of virtual CPUs */ +OPJ_API int OPJ_CALLCONV opj_get_num_cpus(void); #ifdef __cplusplus diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index 58a5a9a9..c2cc31fa 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -182,6 +182,8 @@ static INLINE long opj_lrintf(float f) { #include "bio.h" #include "cio.h" +#include "thread.h" + #include "image.h" #include "invert.h" #include "j2k.h" diff --git a/src/lib/openjp2/thread.c b/src/lib/openjp2/thread.c new file mode 100644 index 00000000..b2f8b5b2 --- /dev/null +++ b/src/lib/openjp2/thread.c @@ -0,0 +1,959 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2016, Even Rouault + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opj_includes.h" + +#include "thread.h" +#include + +#ifdef MUTEX_win32 + +/* Some versions of x86_64-w64-mingw32-gc -m32 resolve InterlockedCompareExchange() */ +/* as __sync_val_compare_and_swap_4 but fails to link it. As this protects against */ +/* a rather unlikely race, skip it */ +#if !(defined(__MINGW32__) && defined(__i386__)) +#define HAVE_INTERLOCKED_COMPARE_EXCHANGE 1 +#endif + +#include + +OPJ_BOOL OPJ_CALLCONV opj_has_thread_support(void) +{ + return OPJ_TRUE; +} + +int OPJ_CALLCONV opj_get_num_cpus(void) +{ + SYSTEM_INFO info; + DWORD dwNum; + GetSystemInfo(&info); + dwNum = info.dwNumberOfProcessors; + if( dwNum < 1 ) + return 1; + return (int)dwNum; +} + +struct opj_mutex_t +{ + CRITICAL_SECTION cs; +}; + +opj_mutex_t* opj_mutex_create(void) +{ + opj_mutex_t* mutex = (opj_mutex_t*) opj_malloc(sizeof(opj_mutex_t)); + if( !mutex ) + return NULL; + InitializeCriticalSectionAndSpinCount(&(mutex->cs), 4000); + return mutex; +} + +void opj_mutex_lock(opj_mutex_t* mutex) +{ + EnterCriticalSection( &(mutex->cs) ); +} + +void opj_mutex_unlock(opj_mutex_t* mutex) +{ + LeaveCriticalSection( &(mutex->cs) ); +} + +void opj_mutex_destroy(opj_mutex_t* mutex) +{ + if( !mutex ) return; + DeleteCriticalSection( &(mutex->cs) ); + opj_free( mutex ); +} + +struct opj_cond_waiter_list_t +{ + HANDLE hEvent; + struct opj_cond_waiter_list_t* next; +}; +typedef struct opj_cond_waiter_list_t opj_cond_waiter_list_t; + +struct opj_cond_t +{ + opj_mutex_t *internal_mutex; + opj_cond_waiter_list_t *waiter_list; +}; + +static DWORD TLSKey = 0; +static volatile LONG inTLSLockedSection = 0; +static volatile int TLSKeyInit = OPJ_FALSE; + +opj_cond_t* opj_cond_create(void) +{ + opj_cond_t* cond = (opj_cond_t*) opj_malloc(sizeof(opj_cond_t)); + if( !cond ) + return NULL; + + /* Make sure that the TLS key is allocated in a thread-safe way */ + /* We cannot use a global mutex/critical section since its creation itself would not be */ + /* thread-safe, so use InterlockedCompareExchange trick */ + while( OPJ_TRUE ) + { + +#if HAVE_INTERLOCKED_COMPARE_EXCHANGE + if( InterlockedCompareExchange(&inTLSLockedSection, 1, 0) == 0 ) +#endif + { + if( !TLSKeyInit ) + { + TLSKey = TlsAlloc(); + TLSKeyInit = OPJ_TRUE; + } +#if HAVE_INTERLOCKED_COMPARE_EXCHANGE + InterlockedCompareExchange(&inTLSLockedSection, 0, 1); +#endif + break; + } + } + + if( TLSKey == TLS_OUT_OF_INDEXES ) + { + opj_free(cond); + return NULL; + } + cond->internal_mutex = opj_mutex_create(); + if (cond->internal_mutex == NULL) + { + opj_free(cond); + return NULL; + } + cond->waiter_list = NULL; + return cond; +} + +void opj_cond_wait(opj_cond_t* cond, opj_mutex_t* mutex) +{ + opj_cond_waiter_list_t* item; + HANDLE hEvent = (HANDLE) TlsGetValue( TLSKey ); + if (hEvent == NULL) + { + hEvent = CreateEvent(NULL, /* security attributes */ + 0, /* manual reset = no */ + 0, /* initial state = unsignaled */ + NULL /* no name */); + assert(hEvent); + + TlsSetValue( TLSKey, hEvent ); + } + + /* Insert the waiter into the waiter list of the condition */ + opj_mutex_lock(cond->internal_mutex); + + item = (opj_cond_waiter_list_t*)opj_malloc(sizeof(opj_cond_waiter_list_t)); + assert(item != NULL); + + item->hEvent = hEvent; + item->next = cond->waiter_list; + + cond->waiter_list = item; + + opj_mutex_unlock(cond->internal_mutex); + + /* Release the client mutex before waiting for the event being signaled */ + opj_mutex_unlock(mutex); + + /* Ideally we would check that we do not get WAIT_FAILED but it is hard */ + /* to report a failure. */ + WaitForSingleObject(hEvent, INFINITE); + + /* Reacquire the client mutex */ + opj_mutex_lock(mutex); +} + +void opj_cond_signal(opj_cond_t* cond) +{ + opj_cond_waiter_list_t* psIter; + + /* Signal the first registered event, and remove it from the list */ + opj_mutex_lock(cond->internal_mutex); + + psIter = cond->waiter_list; + if (psIter != NULL) + { + SetEvent(psIter->hEvent); + cond->waiter_list = psIter->next; + opj_free(psIter); + } + + opj_mutex_unlock(cond->internal_mutex); +} + +void opj_cond_destroy(opj_cond_t* cond) +{ + if( !cond ) return; + opj_mutex_destroy(cond->internal_mutex); + assert(cond->waiter_list == NULL); + opj_free(cond); +} + +struct opj_thread_t +{ + opj_thread_fn thread_fn; + void* user_data; + HANDLE hThread; +}; + +static DWORD WINAPI opj_thread_callback_adapter( void *info ) +{ + opj_thread_t* thread = (opj_thread_t*) info; + HANDLE hEvent = NULL; + + thread->thread_fn( thread->user_data ); + + /* Free the handle possible allocated by a cond */ + while( OPJ_TRUE ) + { + /* Make sure TLSKey is not being created just at that moment... */ +#if HAVE_INTERLOCKED_COMPARE_EXCHANGE + if( InterlockedCompareExchange(&inTLSLockedSection, 1, 0) == 0 ) +#endif + { + if( TLSKeyInit ) + { + hEvent = (HANDLE) TlsGetValue( TLSKey ); + } +#if HAVE_INTERLOCKED_COMPARE_EXCHANGE + InterlockedCompareExchange(&inTLSLockedSection, 0, 1); +#endif + break; + } + } + if( hEvent ) + CloseHandle(hEvent); + + return 0; +} + +opj_thread_t* opj_thread_create( opj_thread_fn thread_fn, void* user_data ) +{ + opj_thread_t* thread; + DWORD nThreadId = 0; + + assert( thread_fn ); + + thread = (opj_thread_t*) opj_malloc( sizeof(opj_thread_t) ); + if( !thread ) + return NULL; + thread->thread_fn = thread_fn; + thread->user_data = user_data; + + thread->hThread = CreateThread( NULL, 0, opj_thread_callback_adapter, thread, + 0, &nThreadId ); + + if( thread->hThread == NULL ) + { + opj_free( thread ); + return NULL; + } + return thread; +} + +void opj_thread_join( opj_thread_t* thread ) +{ + WaitForSingleObject(thread->hThread, INFINITE); + CloseHandle( thread->hThread ); + + opj_free(thread); +} + +#elif MUTEX_pthread + +#include +#include +#include + +OPJ_BOOL OPJ_CALLCONV opj_has_thread_support(void) +{ + return OPJ_TRUE; +} + +int OPJ_CALLCONV opj_get_num_cpus(void) +{ +#ifdef _SC_NPROCESSORS_ONLN + return (int)sysconf(_SC_NPROCESSORS_ONLN); +#else + return 1; +#endif +} + +struct opj_mutex_t +{ + pthread_mutex_t mutex; +}; + +opj_mutex_t* opj_mutex_create(void) +{ + opj_mutex_t* mutex = (opj_mutex_t*) opj_malloc(sizeof(opj_mutex_t)); + if( !mutex ) + return NULL; + pthread_mutex_t pthr_mutex = PTHREAD_MUTEX_INITIALIZER; + mutex->mutex = pthr_mutex; + return mutex; +} + +void opj_mutex_lock(opj_mutex_t* mutex) +{ + pthread_mutex_lock(&(mutex->mutex)); +} + +void opj_mutex_unlock(opj_mutex_t* mutex) +{ + pthread_mutex_unlock(&(mutex->mutex)); +} + +void opj_mutex_destroy(opj_mutex_t* mutex) +{ + if( !mutex ) return; + pthread_mutex_destroy(&(mutex->mutex)); + opj_free(mutex); +} + +struct opj_cond_t +{ + pthread_cond_t cond; +}; + +opj_cond_t* opj_cond_create(void) +{ + opj_cond_t* cond = (opj_cond_t*) opj_malloc(sizeof(opj_cond_t)); + if( !cond ) + return NULL; + if( pthread_cond_init(&(cond->cond), NULL) != 0 ) + { + opj_free(cond); + return NULL; + } + return cond; +} + +void opj_cond_wait(opj_cond_t* cond, opj_mutex_t* mutex) +{ + pthread_cond_wait(&(cond->cond), &(mutex->mutex)); +} + +void opj_cond_signal(opj_cond_t* cond) +{ + int ret = pthread_cond_signal(&(cond->cond)); + (void)ret; + assert(ret == 0); +} + +void opj_cond_destroy(opj_cond_t* cond) +{ + if( !cond ) return; + pthread_cond_destroy(&(cond->cond)); + opj_free(cond); +} + + +struct opj_thread_t +{ + opj_thread_fn thread_fn; + void* user_data; + pthread_t thread; +}; + +static void* opj_thread_callback_adapter( void* info ) +{ + opj_thread_t* thread = (opj_thread_t*) info; + thread->thread_fn( thread->user_data ); + return NULL; +} + +opj_thread_t* opj_thread_create( opj_thread_fn thread_fn, void* user_data ) +{ + pthread_attr_t attr; + opj_thread_t* thread; + + assert( thread_fn ); + + thread = (opj_thread_t*) opj_malloc( sizeof(opj_thread_t) ); + if( !thread ) + return NULL; + thread->thread_fn = thread_fn; + thread->user_data = user_data; + + pthread_attr_init( &attr ); + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); + if( pthread_create( &(thread->thread), &attr, + opj_thread_callback_adapter, (void *) thread ) != 0 ) + { + opj_free( thread ); + return NULL; + } + return thread; +} + +void opj_thread_join( opj_thread_t* thread ) +{ + void* status; + pthread_join( thread->thread, &status); + + opj_free(thread); +} + +#else +/* Stub implementation */ + +OPJ_BOOL OPJ_CALLCONV opj_has_thread_support(void) +{ + return OPJ_FALSE; +} + +int OPJ_CALLCONV opj_get_num_cpus(void) +{ + return 1; +} + +opj_mutex_t* opj_mutex_create(void) +{ + return NULL; +} + +void opj_mutex_lock(opj_mutex_t* mutex) +{ + (void) mutex; +} + +void opj_mutex_unlock(opj_mutex_t* mutex) +{ + (void) mutex; +} + +void opj_mutex_destroy(opj_mutex_t* mutex) +{ + (void) mutex; +} + +opj_cond_t* opj_cond_create(void) +{ + return NULL; +} + +void opj_cond_wait(opj_cond_t* cond, opj_mutex_t* mutex) +{ + (void) cond; + (void) mutex; +} + +void opj_cond_signal(opj_cond_t* cond) +{ + (void) cond; +} + +void opj_cond_destroy(opj_cond_t* cond) +{ + (void) cond; +} + +opj_thread_t* opj_thread_create( opj_thread_fn thread_fn, void* user_data ) +{ + (void) thread_fn; + (void) user_data; + return NULL; +} + +void opj_thread_join( opj_thread_t* thread ) +{ + (void) thread; +} + +#endif + +typedef struct +{ + int key; + void* value; + opj_tls_free_func opj_free_func; +} opj_tls_key_val_t; + +struct opj_tls_t +{ + opj_tls_key_val_t* key_val; + int key_val_count; +}; + +static opj_tls_t* opj_tls_new(void) +{ + return (opj_tls_t*) opj_calloc(1, sizeof(opj_tls_t)); +} + +static void opj_tls_destroy(opj_tls_t* tls) +{ + int i; + if( !tls ) return; + for(i=0;ikey_val_count;i++) + { + if( tls->key_val[i].opj_free_func ) + tls->key_val[i].opj_free_func(tls->key_val[i].value); + } + opj_free(tls->key_val); + opj_free(tls); +} + +void* opj_tls_get(opj_tls_t* tls, int key) +{ + int i; + for(i=0;ikey_val_count;i++) + { + if( tls->key_val[i].key == key ) + return tls->key_val[i].value; + } + return NULL; +} + +OPJ_BOOL opj_tls_set(opj_tls_t* tls, int key, void* value, opj_tls_free_func opj_free_func) +{ + opj_tls_key_val_t* new_key_val; + int i; + for(i=0;ikey_val_count;i++) + { + if( tls->key_val[i].key == key ) + { + if( tls->key_val[i].opj_free_func ) + tls->key_val[i].opj_free_func(tls->key_val[i].value); + tls->key_val[i].value = value; + tls->key_val[i].opj_free_func = opj_free_func; + return OPJ_TRUE; + } + } + new_key_val = (opj_tls_key_val_t*) opj_realloc( tls->key_val, + (tls->key_val_count + 1) * sizeof(opj_tls_key_val_t) ); + if( !new_key_val ) + return OPJ_FALSE; + tls->key_val = new_key_val; + new_key_val[tls->key_val_count].key = key; + new_key_val[tls->key_val_count].value = value; + new_key_val[tls->key_val_count].opj_free_func = opj_free_func; + tls->key_val_count ++; + return OPJ_TRUE; +} + + +typedef struct +{ + opj_job_fn job_fn; + void *user_data; +} opj_worker_thread_job_t; + +typedef struct +{ + opj_thread_pool_t *tp; + opj_thread_t *thread; + int marked_as_waiting; + + opj_mutex_t *mutex; + opj_cond_t *cond; +} opj_worker_thread_t; + +typedef enum +{ + OPJWTS_OK, + OPJWTS_STOP, + OPJWTS_ERROR +} opj_worker_thread_state; + +struct opj_job_list_t +{ + opj_worker_thread_job_t* job; + struct opj_job_list_t* next; +}; +typedef struct opj_job_list_t opj_job_list_t; + +struct opj_worker_thread_list_t +{ + opj_worker_thread_t* worker_thread; + struct opj_worker_thread_list_t* next; +}; +typedef struct opj_worker_thread_list_t opj_worker_thread_list_t; + +struct opj_thread_pool_t +{ + opj_worker_thread_t* worker_threads; + int worker_threads_count; + opj_cond_t* cond; + opj_mutex_t* mutex; + volatile opj_worker_thread_state state; + opj_job_list_t* job_queue; + volatile int pending_jobs_count; + opj_worker_thread_list_t* waiting_worker_thread_list; + int waiting_worker_thread_count; + opj_tls_t* tls; + int signaling_threshold; +}; + +static OPJ_BOOL opj_thread_pool_setup(opj_thread_pool_t* tp, int num_threads); +static opj_worker_thread_job_t* opj_thread_pool_get_next_job(opj_thread_pool_t* tp, + opj_worker_thread_t* worker_thread, + OPJ_BOOL signal_job_finished); + +opj_thread_pool_t* opj_thread_pool_create(int num_threads) +{ + opj_thread_pool_t* tp; + + tp = (opj_thread_pool_t*) opj_calloc(1, sizeof(opj_thread_pool_t)); + if( !tp ) + return NULL; + tp->state = OPJWTS_OK; + + if( num_threads <= 0 ) + { + tp->tls = opj_tls_new(); + if( !tp->tls ) + { + opj_free(tp); + tp = NULL; + } + return tp; + } + + tp->mutex = opj_mutex_create(); + if( !tp->mutex ) + { + opj_free(tp); + return NULL; + } + if( !opj_thread_pool_setup(tp, num_threads) ) + { + opj_thread_pool_destroy(tp); + return NULL; + } + return tp; +} + +static void opj_worker_thread_function(void* user_data) +{ + opj_worker_thread_t* worker_thread; + opj_thread_pool_t* tp; + opj_tls_t* tls; + OPJ_BOOL job_finished = OPJ_FALSE; + + worker_thread = (opj_worker_thread_t* ) user_data; + tp = worker_thread->tp; + tls = opj_tls_new(); + + while( OPJ_TRUE ) + { + opj_worker_thread_job_t* job = opj_thread_pool_get_next_job(tp, worker_thread, job_finished); + if( job == NULL ) + break; + + if( job->job_fn ) + { + job->job_fn(job->user_data, tls); + } + opj_free(job); + job_finished = OPJ_TRUE; + } + + opj_tls_destroy(tls); +} + +static OPJ_BOOL opj_thread_pool_setup(opj_thread_pool_t* tp, int num_threads) +{ + int i; + OPJ_BOOL bRet = OPJ_TRUE; + + assert( num_threads > 0 ); + + tp->cond = opj_cond_create(); + if( tp->cond == NULL ) + return OPJ_FALSE; + + tp->worker_threads = (opj_worker_thread_t*) opj_calloc( num_threads, + sizeof(opj_worker_thread_t) ); + if( tp->worker_threads == NULL ) + return OPJ_FALSE; + tp->worker_threads_count = num_threads; + + for(i=0;iworker_threads[i].tp = tp; + + tp->worker_threads[i].mutex = opj_mutex_create(); + if( tp->worker_threads[i].mutex == NULL ) + { + tp->worker_threads_count = i; + bRet = OPJ_FALSE; + break; + } + + tp->worker_threads[i].cond = opj_cond_create(); + if( tp->worker_threads[i].cond == NULL ) + { + opj_mutex_destroy(tp->worker_threads[i].mutex); + tp->worker_threads_count = i; + bRet = OPJ_FALSE; + break; + } + + tp->worker_threads[i].marked_as_waiting = OPJ_FALSE; + + tp->worker_threads[i].thread = opj_thread_create(opj_worker_thread_function, + &(tp->worker_threads[i])); + if( tp->worker_threads[i].thread == NULL ) + { + tp->worker_threads_count = i; + bRet = OPJ_FALSE; + break; + } + } + + /* Wait all threads to be started */ + /* printf("waiting for all threads to be started\n"); */ + opj_mutex_lock(tp->mutex); + while( tp->waiting_worker_thread_count < num_threads ) + { + opj_cond_wait(tp->cond, tp->mutex); + } + opj_mutex_unlock(tp->mutex); + /* printf("all threads started\n"); */ + + if( tp->state == OPJWTS_ERROR ) + bRet = OPJ_FALSE; + + return bRet; +} + +/* +void opj_waiting() +{ + printf("waiting!\n"); +} +*/ + +static opj_worker_thread_job_t* opj_thread_pool_get_next_job(opj_thread_pool_t* tp, + opj_worker_thread_t* worker_thread, + OPJ_BOOL signal_job_finished) +{ + while( OPJ_TRUE ) + { + opj_job_list_t* top_job_iter; + + opj_mutex_lock(tp->mutex); + + if( signal_job_finished ) + { + signal_job_finished = OPJ_FALSE; + tp->pending_jobs_count --; + /*printf("tp=%p, remaining jobs: %d\n", tp, tp->pending_jobs_count);*/ + if( tp->pending_jobs_count <= tp->signaling_threshold ) + opj_cond_signal(tp->cond); + } + + if( tp->state == OPJWTS_STOP ) + { + opj_mutex_unlock(tp->mutex); + return NULL; + } + top_job_iter = tp->job_queue; + if( top_job_iter ) + { + opj_worker_thread_job_t* job; + tp->job_queue = top_job_iter->next; + + job = top_job_iter->job; + opj_mutex_unlock(tp->mutex); + opj_free(top_job_iter); + return job; + } + + /* opj_waiting(); */ + if( !worker_thread->marked_as_waiting ) + { + opj_worker_thread_list_t* item; + + worker_thread->marked_as_waiting = OPJ_TRUE; + tp->waiting_worker_thread_count ++; + assert(tp->waiting_worker_thread_count <= tp->worker_threads_count); + + item= (opj_worker_thread_list_t*) opj_malloc(sizeof(opj_worker_thread_list_t)); + if( item == NULL ) + { + tp->state = OPJWTS_ERROR; + opj_cond_signal(tp->cond); + + opj_mutex_unlock(tp->mutex); + return NULL; + } + + item->worker_thread = worker_thread; + item->next = tp->waiting_worker_thread_list; + tp->waiting_worker_thread_list = item; + } + + /* printf("signaling that worker thread is ready\n"); */ + opj_cond_signal(tp->cond); + + opj_mutex_lock(worker_thread->mutex); + opj_mutex_unlock(tp->mutex); + + /* printf("waiting for job\n"); */ + opj_cond_wait( worker_thread->cond, worker_thread->mutex ); + + opj_mutex_unlock(worker_thread->mutex); + /* printf("got job\n"); */ + } +} + +OPJ_BOOL opj_thread_pool_submit_job(opj_thread_pool_t* tp, + opj_job_fn job_fn, + void* user_data) +{ + opj_worker_thread_job_t* job; + opj_job_list_t* item; + + if( tp->mutex == NULL ) + { + job_fn( user_data, tp->tls ); + return OPJ_TRUE; + } + + job = (opj_worker_thread_job_t*)opj_malloc(sizeof(opj_worker_thread_job_t)); + if( job == NULL ) + return OPJ_FALSE; + job->job_fn = job_fn; + job->user_data = user_data; + + item = (opj_job_list_t*) opj_malloc(sizeof(opj_job_list_t)); + if( item == NULL ) + { + opj_free(job); + return OPJ_FALSE; + } + item->job = job; + + opj_mutex_lock(tp->mutex); + + tp->signaling_threshold = 100 * tp->worker_threads_count; + while( tp->pending_jobs_count > tp->signaling_threshold ) + { + /* printf("%d jobs enqueued. Waiting\n", tp->pending_jobs_count); */ + opj_cond_wait(tp->cond, tp->mutex); + /* printf("...%d jobs enqueued.\n", tp->pending_jobs_count); */ + } + + item->next = tp->job_queue; + tp->job_queue = item; + tp->pending_jobs_count ++; + + if( tp->waiting_worker_thread_list ) + { + opj_worker_thread_t* worker_thread; + opj_worker_thread_list_t* next; + opj_worker_thread_list_t* to_opj_free; + + worker_thread = tp->waiting_worker_thread_list->worker_thread; + + assert( worker_thread->marked_as_waiting ); + worker_thread->marked_as_waiting = OPJ_FALSE; + + next = tp->waiting_worker_thread_list->next; + to_opj_free = tp->waiting_worker_thread_list; + tp->waiting_worker_thread_list = next; + tp->waiting_worker_thread_count --; + + opj_mutex_lock(worker_thread->mutex); + opj_mutex_unlock(tp->mutex); + opj_cond_signal(worker_thread->cond); + opj_mutex_unlock(worker_thread->mutex); + + opj_free(to_opj_free); + } + else + opj_mutex_unlock(tp->mutex); + + return OPJ_TRUE; +} + +void opj_thread_pool_wait_completion(opj_thread_pool_t* tp, int max_remaining_jobs) +{ + if( tp->mutex == NULL ) + { + return; + } + + if( max_remaining_jobs < 0 ) + max_remaining_jobs = 0; + opj_mutex_lock(tp->mutex); + tp->signaling_threshold = max_remaining_jobs; + while( tp->pending_jobs_count > max_remaining_jobs ) + { + /*printf("tp=%p, jobs before wait = %d, max_remaining_jobs = %d\n", tp, tp->pending_jobs_count, max_remaining_jobs);*/ + opj_cond_wait(tp->cond, tp->mutex); + /*printf("tp=%p, jobs after wait = %d\n", tp, tp->pending_jobs_count);*/ + } + opj_mutex_unlock(tp->mutex); +} + +int opj_thread_pool_get_thread_count(opj_thread_pool_t* tp) +{ + return tp->worker_threads_count; +} + +void opj_thread_pool_destroy(opj_thread_pool_t* tp) +{ + if( !tp ) return; + if( tp->cond ) + { + int i; + opj_thread_pool_wait_completion(tp, 0); + + tp->state = OPJWTS_STOP; + + for(i=0;iworker_threads_count;i++) + { + opj_mutex_lock(tp->worker_threads[i].mutex); + opj_cond_signal(tp->worker_threads[i].cond); + opj_mutex_unlock(tp->worker_threads[i].mutex); + opj_thread_join(tp->worker_threads[i].thread); + opj_cond_destroy(tp->worker_threads[i].cond); + opj_mutex_destroy(tp->worker_threads[i].mutex); + } + + opj_free(tp->worker_threads); + + while( tp->waiting_worker_thread_list != NULL ) + { + opj_worker_thread_list_t* next = tp->waiting_worker_thread_list->next; + opj_free( tp->waiting_worker_thread_list ); + tp->waiting_worker_thread_list = next; + } + + opj_cond_destroy(tp->cond); + } + opj_mutex_destroy(tp->mutex); + opj_tls_destroy(tp->tls); + opj_free(tp); +} diff --git a/src/lib/openjp2/thread.h b/src/lib/openjp2/thread.h new file mode 100644 index 00000000..241e6d88 --- /dev/null +++ b/src/lib/openjp2/thread.h @@ -0,0 +1,253 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2016, Even Rouault + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef THREAD_H +#define THREAD_H + +#include "openjpeg.h" + +/** +@file thread.h +@brief Thread API + +The functions in thread.c have for goal to manage mutex, conditions, thread +creation and thread pools that accept jobs. +*/ + +/** @defgroup THREAD THREAD - Mutex, conditions, threads and thread pools */ +/*@{*/ + +/** @name Mutex */ +/*@{*/ + +/** Opaque type for a mutex */ +typedef struct opj_mutex_t opj_mutex_t; + +/** Creates a mutex. + * @return the mutex or NULL in case of error (can for example happen if the library + * is built without thread support) + */ +opj_mutex_t* opj_mutex_create(void); + +/** Lock/acquire the mutex. + * @param mutex the mutex to acquire. + */ +void opj_mutex_lock(opj_mutex_t* mutex); + +/** Unlock/release the mutex. + * @param mutex the mutex to release. + */ +void opj_mutex_unlock(opj_mutex_t* mutex); + +/** Destroy a mutex + * @param mutex the mutex to destroy. + */ +void opj_mutex_destroy(opj_mutex_t* mutex); + +/*@}*/ + +/** @name Condition */ +/*@{*/ + +/** Opaque type for a condition */ +typedef struct opj_cond_t opj_cond_t; + +/** Creates a condition. + * @return the condition or NULL in case of error (can for example happen if the library + * is built without thread support) + */ +opj_cond_t* opj_cond_create(void); + +/** Wait for the condition to be signaled. + * The semantics is the same as the POSIX pthread_cond_wait. + * The provided mutex *must* be acquired before calling this function, and + * released afterwards. + * The mutex will be released by this function while it must wait for the condition + * and reacquired afterwards. + * In some particular situations, the function might return even if the condition is not signaled + * with opj_cond_signal(), hence the need to check with an application level + * mechanism. + * + * Waiting thread : + * \code + * opj_mutex_lock(mutex); + * while( !some_application_level_condition ) + * { + * opj_cond_wait(cond, mutex); + * } + * opj_mutex_unlock(mutex); + * \endcode + * + * Signaling thread : + * \code + * opj_mutex_lock(mutex); + * some_application_level_condition = TRUE; + * opj_cond_signal(cond); + * opj_mutex_unlock(mutex); + * \endcode + * + * @param cond the condition to wait. + * @param mutex the mutex (in acquired state before calling this function) + */ +void opj_cond_wait(opj_cond_t* cond, opj_mutex_t* mutex); + +/** Signal waiting threads on a condition. + * One of the thread waiting with opj_cond_wait() will be waken up. + * It is strongly advised that this call is done with the mutex that is used + * by opj_cond_wait(), in a acquired state. + * @param cond the condition to signal. + */ +void opj_cond_signal(opj_cond_t* cond); + +/** Destroy a condition + * @param cond the condition to destroy. + */ +void opj_cond_destroy(opj_cond_t* cond); + +/*@}*/ + +/** @name Thread */ +/*@{*/ + +/** Opaque type for a thread handle */ +typedef struct opj_thread_t opj_thread_t; + +/** User function to execute in a thread + * @param user_data user data provided with opj_thread_create() + */ +typedef void (*opj_thread_fn)(void* user_data); + +/** Creates a new thread. + * @param thread_fn Function to run in the new thread. + * @param user_data user data provided to the thread function. Might be NULL. + * @return a thread handle or NULL in case of failure (can for example happen if the library + * is built without thread support) + */ +opj_thread_t* opj_thread_create( opj_thread_fn thread_fn, void* user_data ); + +/** Wait for a thread to be finished and release associated resources to the + * thread handle. + * @param thread the thread to wait for being finished. + */ +void opj_thread_join( opj_thread_t* thread ); + +/*@}*/ + +/** @name Thread local storage */ +/*@{*/ +/** Opaque type for a thread local storage */ +typedef struct opj_tls_t opj_tls_t; + +/** Get a thread local value corresponding to the provided key. + * @param tls thread local storage handle + * @param key key whose value to retrieve. + * @return value associated with the key, or NULL is missing. + */ +void* opj_tls_get(opj_tls_t* tls, int key); + +/** Type of the function used to free a TLS value */ +typedef void (*opj_tls_free_func)(void* value); + +/** Set a thread local value corresponding to the provided key. + * @param tls thread local storage handle + * @param key key whose value to set. + * @param value value to set (may be NULL). + * @param free_func function to call currently installed value. + * @return OPJ_TRUE if successful. + */ +OPJ_BOOL opj_tls_set(opj_tls_t* tls, int key, void* value, opj_tls_free_func free_func); + +/*@}*/ + +/** @name Thread pool */ +/*@{*/ + +/** Opaque type for a thread pool */ +typedef struct opj_thread_pool_t opj_thread_pool_t; + +/** Create a new thread pool. + * num_thread must nominally be >= 1 to create a real thread pool. If num_threads + * is negative or null, then a dummy thread pool will be created. All functions + * operating on the thread pool will work, but job submission will be run + * synchronously in the calling thread. + * + * @param num_threads the number of threads to allocate for this thread pool. + * @return a thread pool handle, or NULL in case of failure (can for example happen if the library + * is built without thread support) + */ +opj_thread_pool_t* opj_thread_pool_create(int num_threads); + +/** User function to execute in a thread + * @param user_data user data provided with opj_thread_create() + * @param tls handle to thread local storage + */ +typedef void (*opj_job_fn)(void* user_data, opj_tls_t* tls); + + +/** Submit a new job to be run by one of the thread in the thread pool. + * The job ( thread_fn, user_data ) will be added in the queue of jobs managed + * by the thread pool, and run by the first thread that is no longer busy. + * + * @param tp the thread pool handle. + * @param job_fn Function to run. Must not be NULL. + * @param user_data User data provided to thread_fn. + * @return OPJ_TRUE if the job was successfully submitted. + */ +OPJ_BOOL opj_thread_pool_submit_job(opj_thread_pool_t* tp, opj_job_fn job_fn, void* user_data); + +/** Wait that no more than max_remaining_jobs jobs are remaining in the queue of + * the thread pool. The aim of this function is to avoid submitting too many + * jobs while the thread pool cannot cope fast enough with them, which would + * result potentially in out-of-memory situations with too many job descriptions + * being queued. + * + * @param tp the thread pool handle + * @param max_remaining_jobs maximum number of jobs allowed to be queued without waiting. + */ +void opj_thread_pool_wait_completion(opj_thread_pool_t* tp, int max_remaining_jobs); + +/** Return the number of threads associated with the thread pool. + * + * @param tp the thread pool handle. + * @return number of threads associated with the thread pool. + */ +int opj_thread_pool_get_thread_count(opj_thread_pool_t* tp); + +/** Destroy a thread pool. + * @param tp the thread pool handle. + */ +void opj_thread_pool_destroy(opj_thread_pool_t* tp); + +/*@}*/ + +/*@}*/ + +#endif /* THREAD_H */ From d4b7f03cfa4732132767188782683f3d957da912 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 16:36:47 +0200 Subject: [PATCH 14/33] Add opj_codec_set_threads() in public API and propagate resulting thread pool to tcd level By default, only the main thread is used. If opj_codec_set_threads() is not used, but the OPJ_NUM_THREADS environment variable is set, its value will be used to initialize the number of threads. The value can be either an integer number, or "ALL_CPUS". If OPJ_NUM_THREADS is set and this function is called, this function will override the behaviour of the environment variable. --- src/lib/openjp2/j2k.c | 55 +++++++++++++++++++++++++++++++++++-- src/lib/openjp2/j2k.h | 8 ++++++ src/lib/openjp2/jp2.c | 5 ++++ src/lib/openjp2/jp2.h | 2 ++ src/lib/openjp2/openjpeg.c | 18 ++++++++++++ src/lib/openjp2/openjpeg.h | 19 +++++++++++++ src/lib/openjp2/opj_codec.h | 4 +++ src/lib/openjp2/tcd.c | 4 ++- src/lib/openjp2/tcd.h | 6 +++- 9 files changed, 117 insertions(+), 4 deletions(-) diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 9eaa155e..68b2f82e 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -5944,6 +5944,32 @@ void opj_j2k_setup_decoder(opj_j2k_t *j2k, opj_dparameters_t *parameters) } } +OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads) +{ + if( opj_has_thread_support() ) + { + opj_thread_pool_destroy(j2k->m_tp); + j2k->m_tp = opj_thread_pool_create((int)num_threads); + if( j2k->m_tp == 0 ) + { + j2k->m_tp = opj_thread_pool_create(0); + return OPJ_FALSE; + } + return OPJ_TRUE; + } + return OPJ_FALSE; +} + +static int opj_j2k_get_default_thread_count() +{ + const char* num_threads = getenv("OPJ_NUM_THREADS"); + if( num_threads == NULL || !opj_has_thread_support() ) + return 0; + if( strcmp(num_threads, "ALL_CPUS") == 0 ) + return opj_get_num_cpus(); + return atoi(num_threads); +} + /* ----------------------------------------------------------------------- */ /* J2K encoder interface */ /* ----------------------------------------------------------------------- */ @@ -5981,6 +6007,17 @@ opj_j2k_t* opj_j2k_create_compress(void) return NULL; } + l_j2k->m_tp = opj_thread_pool_create(opj_j2k_get_default_thread_count()); + if( !l_j2k->m_tp ) + { + l_j2k->m_tp = opj_thread_pool_create(0); + } + if( !l_j2k->m_tp ) + { + opj_j2k_destroy(l_j2k); + return NULL; + } + return l_j2k; } @@ -7486,7 +7523,7 @@ static OPJ_BOOL opj_j2k_copy_default_tcp_and_create_tcd ( opj_j2k_t * p_j2 return OPJ_FALSE; } - if ( !opj_tcd_init(p_j2k->m_tcd, l_image, &(p_j2k->m_cp)) ) { + if ( !opj_tcd_init(p_j2k->m_tcd, l_image, &(p_j2k->m_cp), p_j2k->m_tp) ) { opj_tcd_destroy(p_j2k->m_tcd); p_j2k->m_tcd = 00; opj_event_msg(p_manager, EVT_ERROR, "Cannot decode tile, memory error\n"); @@ -7567,6 +7604,9 @@ void opj_j2k_destroy (opj_j2k_t *p_j2k) opj_image_destroy(p_j2k->m_output_image); p_j2k->m_output_image = NULL; + opj_thread_pool_destroy(p_j2k->m_tp); + p_j2k->m_tp = NULL; + opj_free(p_j2k); } @@ -8658,6 +8698,17 @@ opj_j2k_t* opj_j2k_create_decompress(void) return 00; } + l_j2k->m_tp = opj_thread_pool_create(opj_j2k_get_default_thread_count()); + if( !l_j2k->m_tp ) + { + l_j2k->m_tp = opj_thread_pool_create(0); + } + if( !l_j2k->m_tp ) + { + opj_j2k_destroy(l_j2k); + return NULL; + } + return l_j2k; } @@ -10934,7 +10985,7 @@ static OPJ_BOOL opj_j2k_create_tcd( opj_j2k_t *p_j2k, return OPJ_FALSE; } - if (!opj_tcd_init(p_j2k->m_tcd,p_j2k->m_private_image,&p_j2k->m_cp)) { + if (!opj_tcd_init(p_j2k->m_tcd,p_j2k->m_private_image,&p_j2k->m_cp, p_j2k->m_tp)) { opj_tcd_destroy(p_j2k->m_tcd); p_j2k->m_tcd = 00; return OPJ_FALSE; diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h index 358e0739..be85d5d9 100644 --- a/src/lib/openjp2/j2k.h +++ b/src/lib/openjp2/j2k.h @@ -589,6 +589,12 @@ typedef struct opj_j2k /** the current tile coder/decoder **/ struct opj_tcd * m_tcd; + + /** Number of threads to use */ + int m_num_threads; + + /** Thread pool */ + opj_thread_pool_t* m_tp; } opj_j2k_t; @@ -607,6 +613,8 @@ Decoding parameters are returned in j2k->cp. */ void opj_j2k_setup_decoder(opj_j2k_t *j2k, opj_dparameters_t *parameters); +OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads); + /** * Creates a J2K compression structure * diff --git a/src/lib/openjp2/jp2.c b/src/lib/openjp2/jp2.c index a607c8a9..e156ebfc 100644 --- a/src/lib/openjp2/jp2.c +++ b/src/lib/openjp2/jp2.c @@ -1767,6 +1767,11 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters) jp2->ignore_pclr_cmap_cdef = parameters->flags & OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG; } +OPJ_BOOL opj_jp2_set_threads(opj_jp2_t *jp2, OPJ_UINT32 num_threads) +{ + return opj_j2k_set_threads(jp2->j2k, num_threads); +} + /* ----------------------------------------------------------------------- */ /* JP2 encoder interface */ /* ----------------------------------------------------------------------- */ diff --git a/src/lib/openjp2/jp2.h b/src/lib/openjp2/jp2.h index 94138832..b54d0bfd 100644 --- a/src/lib/openjp2/jp2.h +++ b/src/lib/openjp2/jp2.h @@ -243,6 +243,8 @@ Decoding parameters are returned in jp2->j2k->cp. */ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters); +OPJ_BOOL opj_jp2_set_threads(opj_jp2_t *jp2, OPJ_UINT32 num_threads); + /** * Decode an image from a JPEG-2000 file stream * @param jp2 JP2 decompressor handle diff --git a/src/lib/openjp2/openjpeg.c b/src/lib/openjp2/openjpeg.c index 5114cc10..ee3e14b6 100644 --- a/src/lib/openjp2/openjpeg.c +++ b/src/lib/openjp2/openjpeg.c @@ -239,6 +239,9 @@ opj_codec_t* OPJ_CALLCONV opj_create_decompress(OPJ_CODEC_FORMAT p_format) OPJ_UINT32 res_factor, struct opj_event_mgr * p_manager)) opj_j2k_set_decoded_resolution_factor; + l_codec->opj_set_threads = + (OPJ_BOOL (*) ( void * p_codec, OPJ_UINT32 num_threads )) opj_j2k_set_threads; + l_codec->m_codec = opj_j2k_create_decompress(); if (! l_codec->m_codec) { @@ -315,6 +318,9 @@ opj_codec_t* OPJ_CALLCONV opj_create_decompress(OPJ_CODEC_FORMAT p_format) OPJ_UINT32 res_factor, opj_event_mgr_t * p_manager)) opj_jp2_set_decoded_resolution_factor; + l_codec->opj_set_threads = + (OPJ_BOOL (*) ( void * p_codec, OPJ_UINT32 num_threads )) opj_jp2_set_threads; + l_codec->m_codec = opj_jp2_create(OPJ_TRUE); if (! l_codec->m_codec) { @@ -354,6 +360,18 @@ void OPJ_CALLCONV opj_set_default_decoder_parameters(opj_dparameters_t *paramete } } + +OPJ_API OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, + int num_threads) +{ + if (p_codec ) { + opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec; + + return l_codec->opj_set_threads(l_codec->m_codec, num_threads); + } + return OPJ_FALSE; +} + OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, opj_dparameters_t *parameters ) diff --git a/src/lib/openjp2/openjpeg.h b/src/lib/openjp2/openjpeg.h index 369693df..7912c236 100644 --- a/src/lib/openjp2/openjpeg.h +++ b/src/lib/openjp2/openjpeg.h @@ -1262,6 +1262,25 @@ OPJ_API void OPJ_CALLCONV opj_set_default_decoder_parameters(opj_dparameters_t * OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, opj_dparameters_t *parameters ); +/** + * Allocates worker threads for the compressor/decompressor. + * + * By default, only the main thread is used. If this function is not used, + * but the OPJ_NUM_THREADS environment variable is set, its value will be + * used to initialize the number of threads. The value can be either an integer + * number, or "ALL_CPUS". If OPJ_NUM_THREADS is set and this function is called, + * this function will override the behaviour of the environment variable. + * + * Note: currently only has effect on the decompressor. + * + * @param p_codec decompressor handler + * @param num_threads number of threads. + * + * @return OPJ_TRUE if the decoder is correctly set + */ +OPJ_API OPJ_BOOL OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, + int num_threads); + /** * Decodes an image header. * diff --git a/src/lib/openjp2/opj_codec.h b/src/lib/openjp2/opj_codec.h index 6bd791fa..c88005d7 100644 --- a/src/lib/openjp2/opj_codec.h +++ b/src/lib/openjp2/opj_codec.h @@ -113,6 +113,7 @@ typedef struct opj_codec_private OPJ_BOOL (*opj_set_decoded_resolution_factor) ( void * p_codec, OPJ_UINT32 res_factor, opj_event_mgr_t * p_manager); + } m_decompression; /** @@ -157,6 +158,9 @@ typedef struct opj_codec_private void (*opj_dump_codec) (void * p_codec, OPJ_INT32 info_flag, FILE* output_stream); opj_codestream_info_v2_t* (*opj_get_codec_info)(void* p_codec); opj_codestream_index_t* (*opj_get_codec_index)(void* p_codec); + + /** Set number of threads */ + OPJ_BOOL (*opj_set_threads) ( void * p_codec, OPJ_UINT32 num_threads ); } opj_codec_private_t; diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index b8cd3072..d76a3f9d 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -580,7 +580,8 @@ OPJ_BOOL opj_tcd_rateallocate( opj_tcd_t *tcd, OPJ_BOOL opj_tcd_init( opj_tcd_t *p_tcd, opj_image_t * p_image, - opj_cp_t * p_cp ) + opj_cp_t * p_cp, + opj_thread_pool_t* p_tp ) { p_tcd->image = p_image; p_tcd->cp = p_cp; @@ -597,6 +598,7 @@ OPJ_BOOL opj_tcd_init( opj_tcd_t *p_tcd, p_tcd->tcd_image->tiles->numcomps = p_image->numcomps; p_tcd->tp_pos = p_cp->m_specific_param.m_enc.m_tp_pos; + p_tcd->thread_pool = p_tp; return OPJ_TRUE; } diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index 07f8379a..77817bf6 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -220,6 +220,8 @@ typedef struct opj_tcd OPJ_UINT32 tcd_tileno; /** tell if the tcd is a decoder. */ OPJ_UINT32 m_is_decoder : 1; + /** Thread pool */ + opj_thread_pool_t* thread_pool; } opj_tcd_t; /** @name Exported functions */ @@ -249,12 +251,14 @@ void opj_tcd_destroy(opj_tcd_t *tcd); * @param p_tcd TCD handle. * @param p_image raw image. * @param p_cp coding parameters. + * @param p_tp thread pool * * @return true if the encoding values could be set (false otherwise). */ OPJ_BOOL opj_tcd_init( opj_tcd_t *p_tcd, opj_image_t * p_image, - opj_cp_t * p_cp ); + opj_cp_t * p_cp, + opj_thread_pool_t* p_tp); /** * Allocates memory for decoding a specific tile. From 5fbb8b2645a085391b070162d8551aa960caab6a Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 16:38:44 +0200 Subject: [PATCH 15/33] Use thread-pool for T1 decoding --- src/lib/openjp2/opj_includes.h | 1 + src/lib/openjp2/t1.c | 224 +++++++++++++++++++++------------ src/lib/openjp2/t1.h | 3 +- src/lib/openjp2/tcd.c | 20 +-- src/lib/openjp2/tls_keys.h | 37 ++++++ 5 files changed, 191 insertions(+), 94 deletions(-) create mode 100644 src/lib/openjp2/tls_keys.h diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index c2cc31fa..e835fae4 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -183,6 +183,7 @@ static INLINE long opj_lrintf(float f) { #include "cio.h" #include "thread.h" +#include "tls_keys.h" #include "image.h" #include "invert.h" diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index e1097bf5..adf64bb4 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1540,13 +1540,140 @@ void opj_t1_destroy(opj_t1_t *p_t1) opj_free(p_t1); } -OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, - opj_tcd_tilecomp_t* tilec, - opj_tccp_t* tccp - ) +typedef struct +{ + OPJ_UINT32 resno; + opj_tcd_cblk_dec_t* cblk; + opj_tcd_band_t* band; + opj_tcd_tilecomp_t* tilec; + opj_tccp_t* tccp; + volatile OPJ_BOOL* pret; +} opj_t1_cblk_decode_processing_job_t; + +static void opj_t1_destroy_wrapper(void* t1) +{ + opj_t1_destroy( (opj_t1_t*) t1 ); +} + +static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) +{ + opj_tcd_cblk_dec_t* cblk; + opj_tcd_band_t* band; + opj_tcd_tilecomp_t* tilec; + opj_tccp_t* tccp; + OPJ_INT32* restrict datap; + OPJ_UINT32 cblk_w, cblk_h; + OPJ_INT32 x, y; + OPJ_UINT32 i, j; + opj_t1_cblk_decode_processing_job_t* job; + opj_t1_t* t1; + OPJ_UINT32 resno; + OPJ_UINT32 tile_w; + + job = (opj_t1_cblk_decode_processing_job_t*) user_data; + resno = job->resno; + cblk = job->cblk; + band = job->band; + tilec = job->tilec; + tccp = job->tccp; + tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + + if( !*(job->pret) ) + { + opj_free(job); + return; + } + + t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); + if( t1 == NULL ) + { + t1 = opj_t1_create( OPJ_FALSE ); + opj_tls_set( tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper ); + } + + if (OPJ_FALSE == opj_t1_decode_cblk( + t1, + cblk, + band->bandno, + (OPJ_UINT32)tccp->roishift, + tccp->cblksty)) { + *(job->pret) = OPJ_FALSE; + opj_free(job); + return; + } + + x = cblk->x0 - band->x0; + y = cblk->y0 - band->y0; + if (band->bandno & 1) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + x += pres->x1 - pres->x0; + } + if (band->bandno & 2) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + y += pres->y1 - pres->y0; + } + + datap=t1->data; + cblk_w = t1->w; + cblk_h = t1->h; + + if (tccp->roishift) { + OPJ_INT32 thresh = 1 << tccp->roishift; + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + OPJ_INT32 val = datap[(j * cblk_w) + i]; + OPJ_INT32 mag = abs(val); + if (mag >= thresh) { + mag >>= tccp->roishift; + datap[(j * cblk_w) + i] = val < 0 ? -mag : mag; + } + } + } + } + if (tccp->qmfbid == 1) { + OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + for (j = 0; j < cblk_h; ++j) { + i = 0; + for (; i < (cblk_w & ~3); i += 4) { + OPJ_INT32 tmp0 = datap[(j * cblk_w) + i]; + OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1]; + OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2]; + OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3]; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2; + } + for (; i < cblk_w; ++i) { + OPJ_INT32 tmp = datap[(j * cblk_w) + i]; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2; + } + } + } else { /* if (tccp->qmfbid == 0) */ + OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + for (j = 0; j < cblk_h; ++j) { + OPJ_FLOAT32* restrict tiledp2 = tiledp; + for (i = 0; i < cblk_w; ++i) { + OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize; + *tiledp2 = tmp; + datap++; + tiledp2++; + } + tiledp += tile_w; + } + } + + opj_free(job); +} + + +void opj_t1_decode_cblks( opj_thread_pool_t* tp, + volatile OPJ_BOOL* pret, + opj_tcd_tilecomp_t* tilec, + opj_tccp_t* tccp + ) { OPJ_UINT32 resno, bandno, precno, cblkno; - OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) { opj_tcd_resolution_t* res = &tilec->resolutions[resno]; @@ -1559,85 +1686,24 @@ OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) { opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno]; - OPJ_INT32* restrict datap; - OPJ_UINT32 cblk_w, cblk_h; - OPJ_INT32 x, y; - OPJ_UINT32 i, j; + opj_t1_cblk_decode_processing_job_t* job; - if (OPJ_FALSE == opj_t1_decode_cblk( - t1, - cblk, - band->bandno, - (OPJ_UINT32)tccp->roishift, - tccp->cblksty)) { - return OPJ_FALSE; - } - - x = cblk->x0 - band->x0; - y = cblk->y0 - band->y0; - if (band->bandno & 1) { - opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; - x += pres->x1 - pres->x0; - } - if (band->bandno & 2) { - opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; - y += pres->y1 - pres->y0; - } - - datap=t1->data; - cblk_w = t1->w; - cblk_h = t1->h; - - if (tccp->roishift) { - OPJ_INT32 thresh = 1 << tccp->roishift; - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - OPJ_INT32 val = datap[(j * cblk_w) + i]; - OPJ_INT32 mag = abs(val); - if (mag >= thresh) { - mag >>= tccp->roishift; - datap[(j * cblk_w) + i] = val < 0 ? -mag : mag; - } - } - } - } - if (tccp->qmfbid == 1) { - OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; - for (j = 0; j < cblk_h; ++j) { - i = 0; - for (; i < (cblk_w & ~3); i += 4) { - OPJ_INT32 tmp0 = datap[(j * cblk_w) + i]; - OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1]; - OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2]; - OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2; - } - for (; i < cblk_w; ++i) { - OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2; - } - } - } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; - for (j = 0; j < cblk_h; ++j) { - OPJ_FLOAT32* restrict tiledp2 = tiledp; - for (i = 0; i < cblk_w; ++i) { - OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize; - *tiledp2 = tmp; - datap++; - tiledp2++; - } - tiledp += tile_w; - } - } + job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, sizeof(opj_t1_cblk_decode_processing_job_t)); + job->resno = resno; + job->cblk = cblk; + job->band = band; + job->tilec = tilec; + job->tccp = tccp; + job->pret = pret; + opj_thread_pool_submit_job( tp, opj_t1_clbl_decode_processor, job ); + if( !(*pret) ) + return; } /* cblkno */ } /* precno */ } /* bandno */ } /* resno */ - return OPJ_TRUE; + + return; } diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 22557d96..5afc6490 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -172,7 +172,8 @@ Decode the code-blocks of a tile @param tilec The tile to decode @param tccp Tile coding parameters */ -OPJ_BOOL opj_t1_decode_cblks( opj_t1_t* t1, +void opj_t1_decode_cblks( opj_thread_pool_t* tp, + volatile OPJ_BOOL* pret, opj_tcd_tilecomp_t* tilec, opj_tccp_t* tccp); diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index d76a3f9d..a34fa18c 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1568,30 +1568,22 @@ static OPJ_BOOL opj_tcd_t2_decode (opj_tcd_t *p_tcd, static OPJ_BOOL opj_tcd_t1_decode ( opj_tcd_t *p_tcd ) { OPJ_UINT32 compno; - opj_t1_t * l_t1; opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcd_tilecomp_t* l_tile_comp = l_tile->comps; opj_tccp_t * l_tccp = p_tcd->tcp->tccps; - - - l_t1 = opj_t1_create(OPJ_FALSE); - if (l_t1 == 00) { - return OPJ_FALSE; - } + volatile OPJ_BOOL ret = OPJ_TRUE; for (compno = 0; compno < l_tile->numcomps; ++compno) { - /* The +3 is headroom required by the vectorized DWT */ - if (OPJ_FALSE == opj_t1_decode_cblks(l_t1, l_tile_comp, l_tccp)) { - opj_t1_destroy(l_t1); - return OPJ_FALSE; - } + opj_t1_decode_cblks(p_tcd->thread_pool, &ret, l_tile_comp, l_tccp); + if( !ret ) + break; ++l_tile_comp; ++l_tccp; } - opj_t1_destroy(l_t1); + opj_thread_pool_wait_completion(p_tcd->thread_pool, 0); - return OPJ_TRUE; + return ret; } diff --git a/src/lib/openjp2/tls_keys.h b/src/lib/openjp2/tls_keys.h new file mode 100644 index 00000000..fb26498d --- /dev/null +++ b/src/lib/openjp2/tls_keys.h @@ -0,0 +1,37 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2016, Even Rouault + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TLS_KEYS_H +#define TLS_KEYS_H + +#define OPJ_TLS_KEY_T1 0 + +#endif From 57b216bb587aa7eba13afbbfd6a1fe5f04201b61 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 18:07:15 +0200 Subject: [PATCH 16/33] Use thread pool for DWT decoding --- src/lib/openjp2/dwt.c | 172 +++++++++++++++++++++++++++++++++++++----- src/lib/openjp2/dwt.h | 3 +- src/lib/openjp2/tcd.c | 2 +- 3 files changed, 157 insertions(+), 20 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index a4ff01ba..e21cc16f 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -124,7 +124,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_st /** Inverse wavelet transform in 2-D. */ -static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); +static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); static OPJ_BOOL opj_dwt_encode_procedure( opj_tcd_tilecomp_t * tilec, void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) ); @@ -473,8 +473,8 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) /* */ /* Inverse 5-3 wavelet transform in 2-D. */ /* */ -OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { - return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1); +OPJ_BOOL opj_dwt_decode(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { + return opj_dwt_decode_tile(tp, tilec, numres, &opj_dwt_decode_1); } @@ -556,10 +556,72 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_U return mr ; } +typedef struct +{ + opj_dwt_t h; + DWT1DFN dwt_1D; + OPJ_UINT32 rw; + OPJ_UINT32 w; + OPJ_INT32 * restrict tiledp; + int min_j; + int max_j; +} opj_dwd_decode_h_job_t; + +static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls) +{ + int j; + opj_dwd_decode_h_job_t* job; + (void)tls; + + job = (opj_dwd_decode_h_job_t*)user_data; + for( j = job->min_j; j < job->max_j; j++ ) + { + opj_dwt_interleave_h(&job->h, &job->tiledp[j*job->w]); + (job->dwt_1D)(&job->h); + memcpy(&job->tiledp[j*job->w], job->h.mem, job->rw * sizeof(OPJ_INT32)); + } + + opj_aligned_free(job->h.mem); + opj_free(job); +} + +typedef struct +{ + opj_dwt_t v; + DWT1DFN dwt_1D; + OPJ_UINT32 rh; + OPJ_UINT32 w; + OPJ_INT32 * restrict tiledp; + int min_j; + int max_j; +} opj_dwd_decode_v_job_t; + +static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) +{ + int j; + opj_dwd_decode_v_job_t* job; + (void)tls; + + job = (opj_dwd_decode_v_job_t*)user_data; + for( j = job->min_j; j < job->max_j; j++ ) + { + OPJ_UINT32 k; + opj_dwt_interleave_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w); + (job->dwt_1D)(&job->v); + for(k = 0; k < job->rh; ++k) { + job->tiledp[k * job->w + j] = job->v.mem[k]; + } + } + + opj_aligned_free(job->v.mem); + opj_free(job); +} + + /* */ /* Inverse wavelet transform in 2-D. */ /* */ -static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { +static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { opj_dwt_t h; opj_dwt_t v; @@ -569,11 +631,15 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + size_t h_mem_size; + int num_threads; if (numres == 1U) { return OPJ_TRUE; } - h.mem = (OPJ_INT32*)opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32)); + num_threads = opj_thread_pool_get_thread_count(tp); + h_mem_size = opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32); + h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); if (! h.mem){ /* FIXME event manager error callback */ return OPJ_FALSE; @@ -595,23 +661,93 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); h.cas = tr->x0 % 2; - for(j = 0; j < rh; ++j) { - opj_dwt_interleave_h(&h, &tiledp[j*w]); - (dwt_1D)(&h); - memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); - } + if( num_threads <= 1 || rh == 1 ) + { + for(j = 0; j < rh; ++j) { + opj_dwt_interleave_h(&h, &tiledp[j*w]); + (dwt_1D)(&h); + memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); + } + } + else + { + int num_jobs = num_threads; + if( rh < num_jobs ) + num_jobs = rh; + for( j = 0; j < num_jobs; j++ ) + { + opj_dwd_decode_h_job_t* job; + + job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); + job->h = h; + job->dwt_1D = dwt_1D; + job->rw = rw; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * (rh / num_jobs); + job->max_j = (j+1) * (rh / num_jobs); + if( job->max_j > rh || j == num_jobs - 1 ) + job->max_j = rh; + job->h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); + if (!job->h.mem) + { + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(h.mem); + return OPJ_FALSE; + } + opj_thread_pool_submit_job( tp, opj_dwt_decode_h_func, job ); + } + opj_thread_pool_wait_completion(tp, 0); + } v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); v.cas = tr->y0 % 2; - for(j = 0; j < rw; ++j){ - OPJ_UINT32 k; - opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); - (dwt_1D)(&v); - for(k = 0; k < rh; ++k) { - tiledp[k * w + j] = v.mem[k]; - } - } + if( num_threads <= 1 || rw == 1 ) + { + for(j = 0; j < rw; ++j){ + OPJ_UINT32 k; + opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); + (dwt_1D)(&v); + for(k = 0; k < rh; ++k) { + tiledp[k * w + j] = v.mem[k]; + } + } + } + else + { + int num_jobs = num_threads; + if( rw < num_jobs ) + num_jobs = rw; + for( j = 0; j < num_jobs; j++ ) + { + opj_dwd_decode_v_job_t* job; + + job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); + job->v = v; + job->dwt_1D = dwt_1D; + job->rh = rh; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * (rw / num_jobs); + job->max_j = (j+1) * (rw / num_jobs); + if( job->max_j > rw || j == num_jobs - 1 ) + job->max_j = rw; + job->v.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); + if (!job->v.mem) + { + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(v.mem); + return OPJ_FALSE; + } + opj_thread_pool_submit_job( tp, opj_dwt_decode_v_func, job ); + } + opj_thread_pool_wait_completion(tp, 0); + } } opj_aligned_free(h.mem); return OPJ_TRUE; diff --git a/src/lib/openjp2/dwt.h b/src/lib/openjp2/dwt.h index 21fe942a..93850026 100644 --- a/src/lib/openjp2/dwt.h +++ b/src/lib/openjp2/dwt.h @@ -63,10 +63,11 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec); /** Inverse 5-3 wavelet transform in 2-D. Apply a reversible inverse DWT transform to a component of an image. +@param tp Thread pool @param tilec Tile component information (current tile) @param numres Number of resolution levels to decode */ -OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres); +OPJ_BOOL opj_dwt_decode(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres); /** Get the gain of a subband for the reversible 5-3 DWT. diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index a34fa18c..2980f723 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1610,7 +1610,7 @@ static OPJ_BOOL opj_tcd_dwt_decode ( opj_tcd_t *p_tcd ) */ if (l_tccp->qmfbid == 1) { - if (! opj_dwt_decode(l_tile_comp, l_img_comp->resno_decoded+1)) { + if (! opj_dwt_decode(p_tcd->thread_pool, l_tile_comp, l_img_comp->resno_decoded+1)) { return OPJ_FALSE; } } From e3eb0a206d66cc873eccb57fc12a0497de400aca Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 18:39:41 +0200 Subject: [PATCH 17/33] .travis.yml: add a conf with OPJ_NUM_THREADS=2 --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 0618f26e..3d73f75e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,9 @@ matrix: - os: linux compiler: gcc env: OPJ_CI_ARCH=x86_64 OPJ_CI_BUILD_CONFIGURATION=Release OPJ_CI_INCLUDE_IF_DEPLOY=1 + - os: linux + compiler: gcc + env: OPJ_CI_ARCH=x86_64 OPJ_CI_BUILD_CONFIGURATION=Release OPJ_NUM_THREADS=2 - os: linux compiler: gcc env: OPJ_CI_ARCH=i386 OPJ_CI_BUILD_CONFIGURATION=Release From d67cd2220a291b54718f731052be4a9397f67077 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 19:08:04 +0200 Subject: [PATCH 18/33] opj_decompress: add a -threads option --- src/bin/jp2/opj_decompress.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/bin/jp2/opj_decompress.c b/src/bin/jp2/opj_decompress.c index ab7ff04a..0e02c56b 100644 --- a/src/bin/jp2/opj_decompress.c +++ b/src/bin/jp2/opj_decompress.c @@ -150,6 +150,8 @@ typedef struct opj_decompress_params int upsample; /* split output components to different files */ int split_pnm; + /** number of threads */ + int num_threads; }opj_decompress_parameters; /* -------------------------------------------------------------------------- */ @@ -224,8 +226,11 @@ static void decode_help_display(void) { " -upsample\n" " Downsampled components will be upsampled to image size\n" " -split-pnm\n" - " Split output components to different files when writing to PNM\n" - "\n"); + " Split output components to different files when writing to PNM\n"); + if( opj_has_thread_support() ) { + fprintf(stdout," -threads \n" + " Number of threads to use for decoding.\n"); + } /* UniPG>> */ #ifdef USE_JPWL fprintf(stdout," -W \n" @@ -520,7 +525,8 @@ int parse_cmdline_decoder(int argc, char **argv, opj_decompress_parameters *para {"OutFor", REQ_ARG, NULL,'O'}, {"force-rgb", NO_ARG, NULL, 1}, {"upsample", NO_ARG, NULL, 1}, - {"split-pnm", NO_ARG, NULL, 1} + {"split-pnm", NO_ARG, NULL, 1}, + {"threads", REQ_ARG, NULL, 'T'} }; const char optlist[] = "i:o:r:l:x:d:t:p:" @@ -808,6 +814,22 @@ int parse_cmdline_decoder(int argc, char **argv, opj_decompress_parameters *para break; #endif /* USE_JPWL */ /* <num_threads = opj_get_num_cpus(); + if( parameters->num_threads == 1 ) + parameters->num_threads = 0; + } + else + { + sscanf(opj_optarg, "%d", ¶meters->num_threads); + } + } + break; /* ----------------------------------------------------- */ @@ -1306,7 +1328,13 @@ int main(int argc, char **argv) opj_destroy_codec(l_codec); failed = 1; goto fin; } - + + if( parameters.num_threads >= 1 && !opj_codec_set_threads(l_codec, parameters.num_threads) ) { + fprintf(stderr, "ERROR -> opj_decompress: failed to set number of threads\n"); + opj_stream_destroy(l_stream); + opj_destroy_codec(l_codec); + failed = 1; goto fin; + } /* Read the main header of the codestream and if necessary the JP2 boxes*/ if(! opj_read_header(l_stream, l_codec, &image)){ From 69497d35c0e35a1f9b789d016e9eb4946b8f0fab Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 25 May 2016 21:39:21 +0200 Subject: [PATCH 19/33] opj_decompress: use clock_gettime() instead of getrusage() so as to get the time spent, and not to the total CPU time --- src/bin/jp2/CMakeLists.txt | 3 +++ src/bin/jp2/opj_decompress.c | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/bin/jp2/CMakeLists.txt b/src/bin/jp2/CMakeLists.txt index dc013c21..ad7bce71 100644 --- a/src/bin/jp2/CMakeLists.txt +++ b/src/bin/jp2/CMakeLists.txt @@ -57,6 +57,9 @@ foreach(exe opj_decompress opj_compress opj_dump) # On unix you need to link to the math library: if(UNIX) target_link_libraries(${exe} m) + IF("${CMAKE_SYSTEM_NAME}" MATCHES "Linux") + target_link_libraries(${exe} rt) + endif() endif() # Install exe install(TARGETS ${exe} diff --git a/src/bin/jp2/opj_decompress.c b/src/bin/jp2/opj_decompress.c index 0e02c56b..57fe554b 100644 --- a/src/bin/jp2/opj_decompress.c +++ b/src/bin/jp2/opj_decompress.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef _WIN32 #include "windirent.h" @@ -907,17 +908,22 @@ OPJ_FLOAT64 opj_clock(void) { /* t is the high resolution performance counter (see MSDN) */ QueryPerformanceCounter ( & t ) ; return freq.QuadPart ? (t.QuadPart / (OPJ_FLOAT64)freq.QuadPart) : 0; +#elif defined(__linux) + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return( ts.tv_sec + ts.tv_nsec * 1e-9 ); #else - /* Unix or Linux: use resource usage */ - struct rusage t; - OPJ_FLOAT64 procTime; - /* (1) Get the rusage data structure at this moment (man getrusage) */ - getrusage(0,&t); - /* (2) What is the elapsed time ? - CPU time = User time + System time */ + /* Unix : use resource usage */ + /* FIXME: this counts the total CPU time, instead of the user perceived time */ + struct rusage t; + OPJ_FLOAT64 procTime; + /* (1) Get the rusage data structure at this moment (man getrusage) */ + getrusage(0,&t); + /* (2) What is the elapsed time ? - CPU time = User time + System time */ /* (2a) Get the seconds */ - procTime = (OPJ_FLOAT64)(t.ru_utime.tv_sec + t.ru_stime.tv_sec); - /* (2b) More precisely! Get the microseconds part ! */ - return ( procTime + (OPJ_FLOAT64)(t.ru_utime.tv_usec + t.ru_stime.tv_usec) * 1e-6 ) ; + procTime = (OPJ_FLOAT64)(t.ru_utime.tv_sec + t.ru_stime.tv_sec); + /* (2b) More precisely! Get the microseconds part ! */ + return ( procTime + (OPJ_FLOAT64)(t.ru_utime.tv_usec + t.ru_stime.tv_usec) * 1e-6 ) ; #endif } From 7d3c7a345f05adbc9ca26d8ca7f6c7fffa5096be Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 26 May 2016 23:51:32 +0200 Subject: [PATCH 20/33] Be robust to failed allocations of job structures --- src/lib/openjp2/dwt.c | 20 ++++++++++++++++++++ src/lib/openjp2/t1.c | 5 +++++ 2 files changed, 25 insertions(+) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index e21cc16f..18f8d9c3 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -679,6 +679,16 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t opj_dwd_decode_h_job_t* job; job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); + if( !job ) + { + /* It would be nice to fallback to single thread case, but */ + /* unfortunately some jobs may be launched and have modified */ + /* tiledp, so it is not practical to recover from that error */ + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(h.mem); + return OPJ_FALSE; + } job->h = h; job->dwt_1D = dwt_1D; job->rw = rw; @@ -726,6 +736,16 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t opj_dwd_decode_v_job_t* job; job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); + if( !job ) + { + /* It would be nice to fallback to single thread case, but */ + /* unfortunately some jobs may be launched and have modified */ + /* tiledp, so it is not practical to recover from that error */ + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(v.mem); + return OPJ_FALSE; + } job->v = v; job->dwt_1D = dwt_1D; job->rh = rh; diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index adf64bb4..b1c6128d 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1689,6 +1689,11 @@ void opj_t1_decode_cblks( opj_thread_pool_t* tp, opj_t1_cblk_decode_processing_job_t* job; job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, sizeof(opj_t1_cblk_decode_processing_job_t)); + if( !job ) + { + *pret = OPJ_FALSE; + return; + } job->resno = resno; job->cblk = cblk; job->band = band; From d2d35bf6c2119cd39a298f57b03bf9e17d36946b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BChlstrasser?= Date: Mon, 25 Jul 2016 20:46:11 +0200 Subject: [PATCH 21/33] Create separate type for bit-fields. The definition of bit-fields with type OPJ_UINT32 caused complilation errors on IBM iSeries, because OPJ_UINT32 is defined as uint32_t, and uint32_t is defined as unsigned long in . The definition of bit-fields with an integer type of a specific size doesn't make sense anyway. --- src/lib/openjp2/j2k.h | 30 +++++++++++++++--------------- src/lib/openjp2/openjpeg.h | 2 ++ src/lib/openjp2/tcd.h | 4 ++-- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h index 358e0739..cc725b79 100644 --- a/src/lib/openjp2/j2k.h +++ b/src/lib/openjp2/j2k.h @@ -228,7 +228,7 @@ typedef struct opj_simple_mcc_decorrelation_data OPJ_UINT32 m_nb_comps; opj_mct_data_t * m_decorrelation_array; opj_mct_data_t * m_offset_array; - OPJ_UINT32 m_is_irreversible : 1; + OPJ_BITFIELD m_is_irreversible : 1; } opj_simple_mcc_decorrelation_data_t; @@ -306,11 +306,11 @@ typedef struct opj_tcp /***** FLAGS *******/ /** If cod == 1 --> there was a COD marker for the present tile */ - OPJ_UINT32 cod : 1; + OPJ_BITFIELD cod : 1; /** If ppt == 1 --> there was a PPT marker for the present tile */ - OPJ_UINT32 ppt : 1; + OPJ_BITFIELD ppt : 1; /** indicates if a POC marker has been used O:NO, 1:YES */ - OPJ_UINT32 POC : 1; + OPJ_BITFIELD POC : 1; } opj_tcp_t; @@ -327,13 +327,13 @@ typedef struct opj_encoding_param /** Flag determining tile part generation*/ OPJ_BYTE m_tp_flag; /** allocation by rate/distortion */ - OPJ_UINT32 m_disto_alloc : 1; + OPJ_BITFIELD m_disto_alloc : 1; /** allocation by fixed layer */ - OPJ_UINT32 m_fixed_alloc : 1; + OPJ_BITFIELD m_fixed_alloc : 1; /** add fixed_quality */ - OPJ_UINT32 m_fixed_quality : 1; + OPJ_BITFIELD m_fixed_quality : 1; /** Enabling Tile part generation*/ - OPJ_UINT32 m_tp_on : 1; + OPJ_BITFIELD m_tp_on : 1; } opj_encoding_param_t; @@ -453,9 +453,9 @@ typedef struct opj_cp /******** FLAGS *********/ /** if ppm == 1 --> there was a PPM marker*/ - OPJ_UINT32 ppm : 1; + OPJ_BITFIELD ppm : 1; /** tells if the parameter is a coding or decoding one */ - OPJ_UINT32 m_is_decoder : 1; + OPJ_BITFIELD m_is_decoder : 1; /* < typedef size_t OPJ_SIZE_T; diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index 07f8379a..3c1940b8 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -70,7 +70,7 @@ typedef struct opj_tcd_pass { OPJ_UINT32 rate; OPJ_FLOAT64 distortiondec; OPJ_UINT32 len; - OPJ_UINT32 term : 1; + OPJ_BITFIELD term : 1; } opj_tcd_pass_t; /** @@ -219,7 +219,7 @@ typedef struct opj_tcd /** current encoded/decoded tile */ OPJ_UINT32 tcd_tileno; /** tell if the tcd is a decoder. */ - OPJ_UINT32 m_is_decoder : 1; + OPJ_BITFIELD m_is_decoder : 1; } opj_tcd_t; /** @name Exported functions */ From 54874194a88997c2492fe1206203047739266dff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BChlstrasser?= Date: Wed, 27 Jul 2016 10:09:54 +0200 Subject: [PATCH 22/33] Unified bit-field declaration, removed tabs. --- src/lib/openjp2/j2k.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h index cc725b79..d6cb958b 100644 --- a/src/lib/openjp2/j2k.h +++ b/src/lib/openjp2/j2k.h @@ -497,9 +497,9 @@ typedef struct opj_j2k_dec */ OPJ_BOOL m_last_tile_part; /** to tell that a tile can be decoded. */ - OPJ_BITFIELD m_can_decode : 1; - OPJ_BITFIELD m_discard_tiles : 1; - OPJ_BITFIELD m_skip_data : 1; + OPJ_BITFIELD m_can_decode : 1; + OPJ_BITFIELD m_discard_tiles : 1; + OPJ_BITFIELD m_skip_data : 1; /** TNsot correction : see issue 254 **/ OPJ_BITFIELD m_nb_tile_parts_correction_checked : 1; OPJ_BITFIELD m_nb_tile_parts_correction : 1; From 4f9abb9a45ffd711f9717db15d062fa020ed6cf5 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 11 Aug 2016 21:50:46 +0200 Subject: [PATCH 23/33] [Win32] Use _beginthreadex instead of CreateThread() --- src/lib/openjp2/thread.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/openjp2/thread.c b/src/lib/openjp2/thread.c index b2f8b5b2..59b5d87e 100644 --- a/src/lib/openjp2/thread.c +++ b/src/lib/openjp2/thread.c @@ -44,6 +44,7 @@ #endif #include +#include OPJ_BOOL OPJ_CALLCONV opj_has_thread_support(void) { @@ -224,11 +225,11 @@ struct opj_thread_t HANDLE hThread; }; -static DWORD WINAPI opj_thread_callback_adapter( void *info ) +unsigned int __stdcall opj_thread_callback_adapter( void *info ) { opj_thread_t* thread = (opj_thread_t*) info; HANDLE hEvent = NULL; - + thread->thread_fn( thread->user_data ); /* Free the handle possible allocated by a cond */ @@ -258,7 +259,6 @@ static DWORD WINAPI opj_thread_callback_adapter( void *info ) opj_thread_t* opj_thread_create( opj_thread_fn thread_fn, void* user_data ) { opj_thread_t* thread; - DWORD nThreadId = 0; assert( thread_fn ); @@ -268,8 +268,8 @@ opj_thread_t* opj_thread_create( opj_thread_fn thread_fn, void* user_data ) thread->thread_fn = thread_fn; thread->user_data = user_data; - thread->hThread = CreateThread( NULL, 0, opj_thread_callback_adapter, thread, - 0, &nThreadId ); + thread->hThread = (HANDLE)_beginthreadex(NULL, 0, + opj_thread_callback_adapter, thread, 0, NULL); if( thread->hThread == NULL ) { From 8750e183c652de44d332e786842ca33182c375be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BChlstrasser?= Date: Wed, 7 Sep 2016 08:41:01 +0200 Subject: [PATCH 24/33] Moved type OPJ_BITFIELD from openjpeg.h to opj_includes.h OPJ_BITFIELD is used only in internal headers and must not appear in the public openjpeg.h header. --- src/lib/openjp2/openjpeg.h | 2 -- src/lib/openjp2/opj_includes.h | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib/openjp2/openjpeg.h b/src/lib/openjp2/openjpeg.h index d000a3a6..c07e9c84 100644 --- a/src/lib/openjp2/openjpeg.h +++ b/src/lib/openjp2/openjpeg.h @@ -129,8 +129,6 @@ typedef uint64_t OPJ_UINT64; typedef int64_t OPJ_OFF_T; /* 64-bit file offset type */ -typedef unsigned int OPJ_BITFIELD; - #include typedef size_t OPJ_SIZE_T; diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index 60b7316d..54d203a5 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -183,6 +183,9 @@ static INLINE long opj_lrintf(float f) { # pragma intrinsic(__emul) #endif +/* Type to use for bit-fields in internal headers */ +typedef unsigned int OPJ_BITFIELD; + #include "opj_inttypes.h" #include "opj_clock.h" #include "opj_malloc.h" From ab22c5bad55fccdc440847c896baaf4bf89365a0 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Thu, 8 Sep 2016 09:43:36 +0200 Subject: [PATCH 25/33] opj_thread_pool: fix potential deadlock at thread pool destruction --- src/lib/openjp2/thread.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib/openjp2/thread.c b/src/lib/openjp2/thread.c index 59b5d87e..fce563d0 100644 --- a/src/lib/openjp2/thread.c +++ b/src/lib/openjp2/thread.c @@ -930,7 +930,9 @@ void opj_thread_pool_destroy(opj_thread_pool_t* tp) int i; opj_thread_pool_wait_completion(tp, 0); + opj_mutex_lock(tp->mutex); tp->state = OPJWTS_STOP; + opj_mutex_unlock(tp->mutex); for(i=0;iworker_threads_count;i++) { From 893143c8e13e491d0e884eb757580ec9575bbc8f Mon Sep 17 00:00:00 2001 From: trylab Date: Tue, 13 Sep 2016 17:43:30 +0800 Subject: [PATCH 26/33] Fix issue 833. Add some overflow check operations. --- src/bin/jp2/convertbmp.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/bin/jp2/convertbmp.c b/src/bin/jp2/convertbmp.c index d264823f..ae83077c 100644 --- a/src/bin/jp2/convertbmp.c +++ b/src/bin/jp2/convertbmp.c @@ -675,10 +675,28 @@ opj_image_t* bmptoimage(const char *filename, opj_cparameters_t *parameters) } } + if (Info_h.biWidth == 0 || Info_h.biHeight == 0) { + fclose(IN); + return NULL; + } + + if (Info_h.biBitCount > (((OPJ_UINT32)-1) - 31) / Info_h.biWidth) { + fclose(IN); + return NULL; + } stride = ((Info_h.biWidth * Info_h.biBitCount + 31U) / 32U) * 4U; /* rows are aligned on 32bits */ if (Info_h.biBitCount == 4 && Info_h.biCompression == 2) { /* RLE 4 gets decoded as 8 bits data for now... */ + if (8 > (((OPJ_UINT32)-1) - 31) / Info_h.biWidth) { + fclose(IN); + return NULL; + } stride = ((Info_h.biWidth * 8U + 31U) / 32U) * 4U; } + + if (stride > ((OPJ_UINT32)-1) / sizeof(OPJ_UINT8) / Info_h.biHeight) { + fclose(IN); + return NULL; + } pData = (OPJ_UINT8 *) calloc(1, stride * Info_h.biHeight * sizeof(OPJ_UINT8)); if (pData == NULL) { fclose(IN); From f1f7c533089130266a6c1a502b30a846bb506b56 Mon Sep 17 00:00:00 2001 From: Antonin Descampe Date: Tue, 13 Sep 2016 16:21:38 +0200 Subject: [PATCH 27/33] remove ops 1.4 and 1.2 from abi-check to avoid symlinks in gh-pages --- tools/abi-tracker/openjpeg.json | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/tools/abi-tracker/openjpeg.json b/tools/abi-tracker/openjpeg.json index f8e0faa8..62709718 100644 --- a/tools/abi-tracker/openjpeg.json +++ b/tools/abi-tracker/openjpeg.json @@ -104,29 +104,5 @@ "ABIDiff": "Off", "PublicSymbols": "public_symbols/openjpeg/1.5/list", "PublicTypes": "public_types/openjpeg/1.5/list" - }, - { - "Number": "1.4", - "Installed": "installed/openjpeg/1.4", - "Source": "src/openjpeg/1.4/version.1.4.tar.gz", - "Changelog": "CHANGES", - "HeadersDiff": "On", - "PkgDiff": "Off", - "ABIView": "Off", - "ABIDiff": "Off", - "PublicSymbols": "public_symbols/openjpeg/1.4/list", - "PublicTypes": "public_types/openjpeg/1.4/list" - }, - { - "Number": "1.2", - "Installed": "installed/openjpeg/1.2", - "Source": "src/openjpeg/1.2/version.1.2.tar.gz", - "Changelog": "ChangeLog", - "HeadersDiff": "On", - "PkgDiff": "Off", - "ABIView": "Off", - "ABIDiff": "Off", - "PublicSymbols": "public_symbols/openjpeg/1.2/list", - "PublicTypes": "public_types/openjpeg/1.2/list" }] } From 0954bc11e3ab6a39d86e5ed51286da4b8989743d Mon Sep 17 00:00:00 2001 From: Matthieu Darbois Date: Wed, 14 Sep 2016 00:12:43 +0200 Subject: [PATCH 28/33] Fix some warnings (#838) Fix warnings introduced by uclouvain/openjpeg#786 --- src/bin/jp2/opj_decompress.c | 2 +- src/lib/openjp2/dwt.c | 27 +++--- src/lib/openjp2/j2k.c | 7 +- src/lib/openjp2/openjpeg.c | 12 +-- src/lib/openjp2/opj_includes.h | 1 + src/lib/openjp2/t1.c | 147 +++++++++++++++------------------ src/lib/openjp2/thread.c | 23 ++++-- 7 files changed, 110 insertions(+), 109 deletions(-) diff --git a/src/bin/jp2/opj_decompress.c b/src/bin/jp2/opj_decompress.c index 57fe554b..83160c3d 100644 --- a/src/bin/jp2/opj_decompress.c +++ b/src/bin/jp2/opj_decompress.c @@ -911,7 +911,7 @@ OPJ_FLOAT64 opj_clock(void) { #elif defined(__linux) struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); - return( ts.tv_sec + ts.tv_nsec * 1e-9 ); + return( (OPJ_FLOAT64)ts.tv_sec + (OPJ_FLOAT64)ts.tv_nsec * 1e-9 ); #else /* Unix : use resource usage */ /* FIXME: this counts the total CPU time, instead of the user perceived time */ diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 2e28effc..2d793bb6 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -563,13 +563,13 @@ typedef struct OPJ_UINT32 rw; OPJ_UINT32 w; OPJ_INT32 * OPJ_RESTRICT tiledp; - int min_j; - int max_j; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; } opj_dwd_decode_h_job_t; static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls) { - int j; + OPJ_UINT32 j; opj_dwd_decode_h_job_t* job; (void)tls; @@ -592,13 +592,13 @@ typedef struct OPJ_UINT32 rh; OPJ_UINT32 w; OPJ_INT32 * OPJ_RESTRICT tiledp; - int min_j; - int max_j; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; } opj_dwd_decode_v_job_t; static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) { - int j; + OPJ_UINT32 j; opj_dwd_decode_v_job_t* job; (void)tls; @@ -631,8 +631,8 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); - size_t h_mem_size; - int num_threads; + size_t h_mem_size; + int num_threads; if (numres == 1U) { return OPJ_TRUE; @@ -671,9 +671,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t } else { - int num_jobs = num_threads; - if( rh < num_jobs ) + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + if( rh < num_jobs ) { num_jobs = rh; + } for( j = 0; j < num_jobs; j++ ) { opj_dwd_decode_h_job_t* job; @@ -695,7 +696,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t job->w = w; job->tiledp = tiledp; job->min_j = j * (rh / num_jobs); - job->max_j = (j+1) * (rh / num_jobs); + job->max_j = (j+1) * (rh / num_jobs); /* TODO this can overflow */ if( job->max_j > rh || j == num_jobs - 1 ) job->max_j = rh; job->h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); @@ -728,7 +729,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t } else { - int num_jobs = num_threads; + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; if( rw < num_jobs ) num_jobs = rw; for( j = 0; j < num_jobs; j++ ) @@ -752,7 +753,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t job->w = w; job->tiledp = tiledp; job->min_j = j * (rw / num_jobs); - job->max_j = (j+1) * (rw / num_jobs); + job->max_j = (j+1) * (rw / num_jobs); /* TODO this can overflow */ if( job->max_j > rw || j == num_jobs - 1 ) job->max_j = rw; job->v.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index bdacbe91..66802bb9 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -5953,8 +5953,11 @@ OPJ_BOOL opj_j2k_set_threads(opj_j2k_t *j2k, OPJ_UINT32 num_threads) if( opj_has_thread_support() ) { opj_thread_pool_destroy(j2k->m_tp); - j2k->m_tp = opj_thread_pool_create((int)num_threads); - if( j2k->m_tp == 0 ) + j2k->m_tp = NULL; + if (num_threads <= (OPJ_UINT32)INT_MAX ) { + j2k->m_tp = opj_thread_pool_create((int)num_threads); + } + if( j2k->m_tp == NULL ) { j2k->m_tp = opj_thread_pool_create(0); return OPJ_FALSE; diff --git a/src/lib/openjp2/openjpeg.c b/src/lib/openjp2/openjpeg.c index ee3e14b6..4d12540e 100644 --- a/src/lib/openjp2/openjpeg.c +++ b/src/lib/openjp2/openjpeg.c @@ -361,15 +361,15 @@ void OPJ_CALLCONV opj_set_default_decoder_parameters(opj_dparameters_t *paramete } -OPJ_API OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, +OPJ_BOOL OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, int num_threads) { - if (p_codec ) { - opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec; + if (p_codec && (num_threads >= 0)) { + opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec; - return l_codec->opj_set_threads(l_codec->m_codec, num_threads); - } - return OPJ_FALSE; + return l_codec->opj_set_threads(l_codec->m_codec, (OPJ_UINT32)num_threads); + } + return OPJ_FALSE; } OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h index 3576877c..e3de42da 100644 --- a/src/lib/openjp2/opj_includes.h +++ b/src/lib/openjp2/opj_includes.h @@ -54,6 +54,7 @@ #include #include #include +#include /* Use fseeko() and ftello() if they are available since they use diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 66884e3b..1b722c9e 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -86,15 +86,15 @@ static INLINE void opj_t1_dec_sigpass_step_raw( OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, - OPJ_INT32 row); + OPJ_UINT32 row); static INLINE void opj_t1_dec_sigpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row, - OPJ_INT32 flags_stride); + OPJ_UINT32 row, + OPJ_UINT32 flags_stride); static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -102,7 +102,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, - OPJ_INT32 row); + OPJ_UINT32 row); /** @@ -154,8 +154,7 @@ Decode refinement pass */ static void opj_t1_dec_refpass_raw( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 cblksty); + OPJ_INT32 bpno); static void opj_t1_dec_refpass_mqc_vsc( opj_t1_t *t1, OPJ_INT32 bpno); @@ -176,12 +175,11 @@ static void opj_t1_dec_refpass_step(opj_t1_t *t1, static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, - opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 row); + OPJ_UINT32 row); static INLINE void opj_t1_dec_refpass_step_mqc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -189,7 +187,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc( OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 row); + OPJ_UINT32 row); static INLINE void opj_t1_dec_refpass_step_mqc_vsc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -198,7 +196,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( OPJ_INT32 poshalf, OPJ_INT32 neghalf, OPJ_INT32 vsc, - OPJ_INT32 row); + OPJ_UINT32 row); @@ -223,14 +221,14 @@ static void opj_t1_dec_clnpass_step_partial( opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row); + OPJ_UINT32 row); static void opj_t1_dec_clnpass_step( opj_t1_t *t1, opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row); + OPJ_UINT32 row); static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, @@ -239,7 +237,7 @@ static void opj_t1_dec_clnpass_step_vsc( OPJ_INT32 oneplushalf, OPJ_INT32 partial, OPJ_INT32 vsc, - OPJ_INT32 row); + OPJ_UINT32 row); /** Encode clean-up pass */ @@ -248,13 +246,6 @@ static void opj_t1_enc_clnpass( OPJ_INT32 bpno, OPJ_INT32 *nmsedec, OPJ_UINT32 cblksty); -/** -Decode clean-up pass -*/ -static void opj_t1_dec_clnpass( - opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 cblksty); static OPJ_FLOAT64 opj_t1_getwmsedec( OPJ_INT32 nmsedec, @@ -362,10 +353,10 @@ static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT sp[1] |= T1_SIG_NW; } -static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_UINT32 s, OPJ_UINT32 stride, OPJ_INT32 row) +static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_UINT32 s, OPJ_UINT32 stride, OPJ_UINT32 row) { opj_t1_updateflags(flagsp, s, stride); - if( row == 0 ) + if( row == 0U ) { *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); @@ -377,7 +368,7 @@ static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, opj_colflag_t *(colflagsp - stride) |= (T1_COLFLAG_SIG_OTHER_ROW_3); *(colflagsp - stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); } - else if( row == 3 ) + else if( row == 3U ) { *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))); @@ -450,7 +441,7 @@ static INLINE void opj_t1_dec_sigpass_step_raw( OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, flag; opj_raw_t *raw = t1->raw; /* RAW component */ @@ -475,8 +466,8 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( opj_colflag_t* colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row, - OPJ_INT32 flags_stride) + OPJ_UINT32 row, + OPJ_UINT32 flags_stride) { OPJ_INT32 v, flag; @@ -510,7 +501,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 vsc, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, flag; @@ -611,16 +602,16 @@ static void opj_t1_dec_sigpass_raw( opj_colflag_t *colflags2 = colflags1 + i; \ if( *colflags2 == 0 ) continue; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3, flags_stride); \ + opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \ data2 += w; \ } \ data1 += w << 2; \ @@ -666,22 +657,22 @@ static void opj_t1_dec_sigpass_mqc_vsc( one = 1 << bpno; half = one >> 1; oneplushalf = one | half; - for (k = 0; k < (t1->h & ~3); k += 4) { + for (k = 0; k < (t1->h & ~(OPJ_UINT32)3); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; opj_flag_t *flags2 = flags1 + i; opj_colflag_t *colflags2 = colflags1 + i; flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 0); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 0U); data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 1); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 1U); data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 2); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 2U); data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 1, 3); + opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 1, 3U); data2 += t1->w; } data1 += t1->w << 2; @@ -736,12 +727,11 @@ static void opj_t1_enc_refpass_step( opj_t1_t *t1, static INLINE void opj_t1_dec_refpass_step_raw( opj_t1_t *t1, - opj_flag_t *flagsp, opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, t; @@ -767,7 +757,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc( OPJ_INT32 *datap, OPJ_INT32 poshalf, OPJ_INT32 neghalf, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, t; @@ -775,6 +765,8 @@ static INLINE void opj_t1_dec_refpass_step_mqc( #ifdef CONSISTENCY_CHECK assert( ((*flagsp & (T1_SIG | T1_VISIT)) == T1_SIG) == ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) ); +#else + (void)flagsp_unused; #endif if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { @@ -796,9 +788,9 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( OPJ_INT32 poshalf, OPJ_INT32 neghalf, OPJ_INT32 vsc, - OPJ_INT32 row) + OPJ_UINT32 row) { - OPJ_INT32 v, t, flag; + OPJ_INT32 v, t; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -848,12 +840,10 @@ static void opj_t1_enc_refpass( static void opj_t1_dec_refpass_raw( opj_t1_t *t1, - OPJ_INT32 bpno, - OPJ_INT32 cblksty) + OPJ_INT32 bpno) { OPJ_INT32 one, poshalf, neghalf; OPJ_UINT32 i, j, k; - OPJ_INT32 vsc; opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1]; one = 1 << bpno; poshalf = one >> 1; @@ -864,7 +854,6 @@ static void opj_t1_dec_refpass_raw( for (j = k; j < k + 4 && j < t1->h; ++j) { opj_t1_dec_refpass_step_raw( t1, - &t1->flags[((j+1) * t1->flags_stride) + i + 1], colflags2, &t1->data[(j * t1->w) + i], poshalf, @@ -892,16 +881,16 @@ static void opj_t1_dec_refpass_raw( opj_colflag_t *colflags2 = colflags1 + i; \ if( *colflags2 == 0 ) continue; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 0); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 0U); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 1); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 1U); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 2); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 2U); \ data2 += w; \ flags2 += flags_stride; \ - opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 3); \ + opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 3U); \ data2 += w; \ } \ data1 += w << 2; \ @@ -948,22 +937,22 @@ static void opj_t1_dec_refpass_mqc_vsc( one = 1 << bpno; poshalf = one >> 1; neghalf = bpno > 0 ? -poshalf : -1; - for (k = 0; k < (t1->h & ~3); k += 4) { + for (k = 0; k < (t1->h & ~(OPJ_UINT32)3U); k += 4U) { for (i = 0; i < t1->w; ++i) { OPJ_INT32 *data2 = data1 + i; opj_flag_t *flags2 = flags1 + i; opj_colflag_t *colflags2 = colflags1 + i; flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 0); + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 0U); data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 1); + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 1U); data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 2); + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 2U); data2 += t1->w; flags2 += t1->flags_stride; - opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 1, 3); + opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 1, 3U); data2 += t1->w; } data1 += t1->w << 2; @@ -1028,7 +1017,7 @@ static void opj_t1_dec_clnpass_step_partial( opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -1049,7 +1038,7 @@ static void opj_t1_dec_clnpass_step( opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, flag; @@ -1078,8 +1067,8 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, - OPJ_INT32 row, - OPJ_INT32 flags_stride) + OPJ_UINT32 row, + OPJ_UINT32 flags_stride) { OPJ_INT32 v; OPJ_INT32 flag; @@ -1094,7 +1083,7 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, v, flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, flags_stride, row); } } /*flagsp &= ~T1_VISIT;*/ @@ -1103,12 +1092,12 @@ static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit( static void opj_t1_dec_clnpass_step_vsc( opj_t1_t *t1, opj_flag_t *flagsp, - opj_colflag_t *colflagsp, + opj_colflag_t *colflagsp, OPJ_INT32 *datap, OPJ_INT32 oneplushalf, OPJ_INT32 partial, OPJ_INT32 vsc, - OPJ_INT32 row) + OPJ_UINT32 row) { OPJ_INT32 v, flag; @@ -1125,7 +1114,7 @@ LABEL_PARTIAL: opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); } } #ifdef CONSISTENCY_CHECK @@ -1202,7 +1191,7 @@ static void opj_t1_enc_clnpass( #define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \ { \ OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \ - OPJ_UINT32 i, j, k; \ + OPJ_UINT32 i, j, k; \ OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \ \ opj_mqc_t *mqc = t1->mqc; /* MQC component */ \ @@ -1294,28 +1283,28 @@ static void opj_t1_enc_clnpass( flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0, flags_stride); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1, flags_stride); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2, flags_stride); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\ - opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3, flags_stride); \ + opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \ } \ if( consistency_check ) *flags2 &= ~T1_VISIT; \ data2 += w; \ @@ -1634,19 +1623,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; for (j = 0; j < cblk_h; ++j) { i = 0; - for (; i < (cblk_w & ~3); i += 4) { - OPJ_INT32 tmp0 = datap[(j * cblk_w) + i]; - OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1]; - OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2]; - OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2; + for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { + OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U]; + OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; + OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; + OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2; } for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2; + ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2; } } } else { /* if (tccp->qmfbid == 0) */ @@ -1778,7 +1767,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, break; case 1: if (type == T1_TYPE_RAW) { - opj_t1_dec_refpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); + opj_t1_dec_refpass_raw(t1, bpno_plus_one); } else { if (cblksty & J2K_CCP_CBLKSTY_VSC) { opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); @@ -1821,7 +1810,7 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, break; case 1: if (type == T1_TYPE_RAW) { - opj_t1_dec_refpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty); + opj_t1_dec_refpass_raw(t1, bpno_plus_one); } else { if (cblksty & J2K_CCP_CBLKSTY_VSC) { opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one); diff --git a/src/lib/openjp2/thread.c b/src/lib/openjp2/thread.c index fce563d0..79a9d5fb 100644 --- a/src/lib/openjp2/thread.c +++ b/src/lib/openjp2/thread.c @@ -314,11 +314,13 @@ struct opj_mutex_t opj_mutex_t* opj_mutex_create(void) { - opj_mutex_t* mutex = (opj_mutex_t*) opj_malloc(sizeof(opj_mutex_t)); - if( !mutex ) - return NULL; - pthread_mutex_t pthr_mutex = PTHREAD_MUTEX_INITIALIZER; - mutex->mutex = pthr_mutex; + opj_mutex_t* mutex = (opj_mutex_t*) opj_calloc(1U, sizeof(opj_mutex_t)); + if( mutex != NULL ) { + if ( pthread_mutex_init(&mutex->mutex, NULL) != 0) { + opj_free(mutex); + mutex = NULL; + } + } return mutex; } @@ -537,19 +539,24 @@ OPJ_BOOL opj_tls_set(opj_tls_t* tls, int key, void* value, opj_tls_free_func opj { opj_tls_key_val_t* new_key_val; int i; + + if (tls->key_val_count == INT_MAX) { + return OPJ_FALSE; + } for(i=0;ikey_val_count;i++) { if( tls->key_val[i].key == key ) { - if( tls->key_val[i].opj_free_func ) + if( tls->key_val[i].opj_free_func ) { tls->key_val[i].opj_free_func(tls->key_val[i].value); + } tls->key_val[i].value = value; tls->key_val[i].opj_free_func = opj_free_func; return OPJ_TRUE; } } new_key_val = (opj_tls_key_val_t*) opj_realloc( tls->key_val, - (tls->key_val_count + 1) * sizeof(opj_tls_key_val_t) ); + ((size_t)tls->key_val_count + 1U) * sizeof(opj_tls_key_val_t) ); if( !new_key_val ) return OPJ_FALSE; tls->key_val = new_key_val; @@ -691,7 +698,7 @@ static OPJ_BOOL opj_thread_pool_setup(opj_thread_pool_t* tp, int num_threads) if( tp->cond == NULL ) return OPJ_FALSE; - tp->worker_threads = (opj_worker_thread_t*) opj_calloc( num_threads, + tp->worker_threads = (opj_worker_thread_t*) opj_calloc( (size_t)num_threads, sizeof(opj_worker_thread_t) ); if( tp->worker_threads == NULL ) return OPJ_FALSE; From f88c9974e27161427c003dda5d89f61a5a3995e9 Mon Sep 17 00:00:00 2001 From: Matthieu Darbois Date: Wed, 14 Sep 2016 23:46:46 +0200 Subject: [PATCH 29/33] Flags in T1 shall be unsigned (#840) This will remove some conversion warnings --- src/lib/openjp2/t1.c | 117 +++++++++++++++-------------- src/lib/openjp2/t1.h | 76 +++++++++---------- src/lib/openjp2/t1_generate_luts.c | 56 +++++++------- src/lib/openjp2/t1_luts.h | 8 +- 4 files changed, 129 insertions(+), 128 deletions(-) diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 1b722c9e..53451677 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -338,18 +338,18 @@ static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s, OPJ_UINT /* and T1_SIG_E == T1_SIG_N << 1, T1_SIG_W == T1_SIG_N << 2 and T1_SIG_S == T1_SIG_N << 2 */ /* and T1_SGN_E == T1_SGN_N << 1, T1_SGN_W == T1_SGN_N << 2 and T1_SGN_S == T1_SGN_N << 2 */ - opj_flag_t flag_N = T1_SIG_N | (T1_SIG_N << (4 * s)); + unsigned int flag_N = T1_SIG_N | ((unsigned int)T1_SIG_N << (4U * s)); np[-1] |= T1_SIG_SE; - np[0] |= flag_N << 2; + np[0] |= (opj_flag_t)(flag_N << 2U); np[1] |= T1_SIG_SW; - flagsp[-1] |= flag_N << 1; + flagsp[-1] |= (opj_flag_t)(flag_N << 1U); flagsp[0] |= T1_SIG; - flagsp[1] |= flag_N << 3; + flagsp[1] |= (opj_flag_t)(flag_N << 3U); sp[-1] |= T1_SIG_NE; - sp[0] |= flag_N; + sp[0] |= (opj_flag_t)flag_N; sp[1] |= T1_SIG_NW; } @@ -358,39 +358,39 @@ static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp, opj_colflag_t opj_t1_updateflags(flagsp, s, stride); if( row == 0U ) { - *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); - *(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); - *(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1U)))); + *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1U)))); + *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1U)))); *(colflagsp - stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); *(colflagsp - stride) |= (T1_COLFLAG_SIG_OTHER_ROW_3); *(colflagsp - stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3); } else if( row == 3U ) { - *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))); - *(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))); - *(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS* (row-1))); + *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1U)))); + *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1U)))); + *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS* (row-1)))); *(colflagsp + stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0); *(colflagsp + stride) |= (T1_COLFLAG_SIG_OTHER_ROW_0); *(colflagsp + stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0); } else { - *(colflagsp - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); - *colflagsp |= (T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); - *(colflagsp + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1))) | - (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1))); + *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1U))) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1U)))); + *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1U))) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1U)))); + *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row)) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row-1U))) | + (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row+1U)))); } } @@ -443,20 +443,20 @@ static INLINE void opj_t1_dec_sigpass_step_raw( OPJ_INT32 vsc, OPJ_UINT32 row) { - OPJ_INT32 v, flag; + OPJ_UINT32 v, flag; opj_raw_t *raw = t1->raw; /* RAW component */ flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); if ((flag & T1_SIG_OTH) && !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) { if (opj_raw_decode(raw)) { - v = (OPJ_INT32)opj_raw_decode(raw); /* ESSAI */ + v = opj_raw_decode(raw); /* ESSAI */ *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); } #ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; #endif - *colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); + *colflagsp |= (opj_colflag_t)(T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); } } @@ -490,7 +490,7 @@ static INLINE void opj_t1_dec_sigpass_step_mqc( #ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; #endif - *colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); + *colflagsp |= (opj_colflag_t)(T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -503,23 +503,23 @@ static INLINE void opj_t1_dec_sigpass_step_mqc_vsc( OPJ_INT32 vsc, OPJ_UINT32 row) { - OPJ_INT32 v, flag; + OPJ_UINT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); if ((flag & T1_SIG_OTH) && !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); if (opj_mqc_decode(mqc)) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); + v = (OPJ_UINT32)opj_mqc_decode(mqc) ^ opj_t1_getspb(flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); } #ifdef CONSISTENCY_CHECK *flagsp |= T1_VISIT; #endif - *colflagsp |= (T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); + *colflagsp |= (opj_colflag_t)(T1_COLFLAG_VISIT_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -742,7 +742,7 @@ static INLINE void opj_t1_dec_refpass_step_raw( v = (OPJ_INT32)opj_raw_decode(raw); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); + *colflagsp |= (opj_colflag_t)(T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -776,7 +776,7 @@ static INLINE void opj_t1_dec_refpass_step_mqc( v = opj_mqc_decode(mqc); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); + *colflagsp |= (opj_colflag_t)(T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -790,20 +790,21 @@ static INLINE void opj_t1_dec_refpass_step_mqc_vsc( OPJ_INT32 vsc, OPJ_UINT32 row) { - OPJ_INT32 v, t; - + OPJ_UINT32 v; + OPJ_INT32 t; + opj_mqc_t *mqc = t1->mqc; /* MQC component */ if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) { - OPJ_INT32 flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); + OPJ_UINT32 flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp); OPJ_UINT32 tmp1 = (flag & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG; - OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1; + OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2U : tmp1; opj_mqc_setcurctx(mqc, tmp2); /* ESSAI */ - v = opj_mqc_decode(mqc); + v = (OPJ_UINT32)opj_mqc_decode(mqc); t = v ? poshalf : neghalf; *datap += *datap < 0 ? -t : t; - *colflagsp |= (T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); + *colflagsp |= (opj_colflag_t)(T1_COLFLAG_REFINE_ROW_0 << (T1_COLFLAG_RBS * row)); } } /* VSC and BYPASS by Antonin */ @@ -1008,7 +1009,7 @@ LABEL_PARTIAL: opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride); } } - *flagsp &= ~T1_VISIT; + *flagsp &= (opj_flag_t)~T1_VISIT; } static void opj_t1_dec_clnpass_step_partial( @@ -1099,7 +1100,7 @@ static void opj_t1_dec_clnpass_step_vsc( OPJ_INT32 vsc, OPJ_UINT32 row) { - OPJ_INT32 v, flag; + OPJ_UINT32 v, flag; opj_mqc_t *mqc = t1->mqc; /* MQC component */ @@ -1108,13 +1109,13 @@ static void opj_t1_dec_clnpass_step_vsc( goto LABEL_PARTIAL; } if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (T1_COLFLAG_RBS * row)))) { - opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag)); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag)); if (opj_mqc_decode(mqc)) { LABEL_PARTIAL: - opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag)); - v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag); + opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag)); + v = (OPJ_UINT32)opj_mqc_decode(mqc) ^ opj_t1_getspb(flag); *datap = v ? -oneplushalf : oneplushalf; - opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride, row); + opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row); } } #ifdef CONSISTENCY_CHECK @@ -1236,7 +1237,7 @@ static void opj_t1_enc_clnpass( agg && (j == k + (OPJ_UINT32)runlen), \ vsc, j - k); \ } \ - *colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ + *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ } \ colflags1 += flags_stride; \ } \ @@ -1285,31 +1286,31 @@ static void opj_t1_enc_clnpass( if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= ~T1_VISIT; \ + if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= ~T1_VISIT; \ + if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= ~T1_VISIT; \ + if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ data2 += w; \ flags2 += flags_stride; \ if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \ if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\ opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \ } \ - if( consistency_check ) *flags2 &= ~T1_VISIT; \ + if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \ data2 += w; \ } \ - *colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ + *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ } \ data1 += w << 2; \ flags1 += flags_stride << 2; \ @@ -1324,7 +1325,7 @@ static void opj_t1_enc_clnpass( opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j - k); \ data2 += w; \ } \ - *colflags2 &= ~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ + *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \ } \ } \ \ diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 5afc6490..c3fb7bed 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -53,40 +53,40 @@ in T1.C are used by some function in TCD.C. /* CAUTION: the value of those constants must not be changed, otherwise the */ /* optimization of opj_t1_updateflags() will break! */ /* BEGINNING of flags that apply to opj_flag_t */ -#define T1_SIG_NE 0x0001 /**< Context orientation : North-East direction */ -#define T1_SIG_SE 0x0002 /**< Context orientation : South-East direction */ -#define T1_SIG_SW 0x0004 /**< Context orientation : South-West direction */ -#define T1_SIG_NW 0x0008 /**< Context orientation : North-West direction */ -#define T1_SIG_N 0x0010 /**< Context orientation : North direction */ -#define T1_SIG_E 0x0020 /**< Context orientation : East direction */ -#define T1_SIG_S 0x0040 /**< Context orientation : South direction */ -#define T1_SIG_W 0x0080 /**< Context orientation : West direction */ +#define T1_SIG_NE 0x0001U /**< Context orientation : North-East direction */ +#define T1_SIG_SE 0x0002U /**< Context orientation : South-East direction */ +#define T1_SIG_SW 0x0004U /**< Context orientation : South-West direction */ +#define T1_SIG_NW 0x0008U /**< Context orientation : North-West direction */ +#define T1_SIG_N 0x0010U /**< Context orientation : North direction */ +#define T1_SIG_E 0x0020U /**< Context orientation : East direction */ +#define T1_SIG_S 0x0040U /**< Context orientation : South direction */ +#define T1_SIG_W 0x0080U /**< Context orientation : West direction */ #define T1_SIG_OTH (T1_SIG_N|T1_SIG_NE|T1_SIG_E|T1_SIG_SE|T1_SIG_S|T1_SIG_SW|T1_SIG_W|T1_SIG_NW) #define T1_SIG_PRIM (T1_SIG_N|T1_SIG_E|T1_SIG_S|T1_SIG_W) -#define T1_SGN_N 0x0100 -#define T1_SGN_E 0x0200 -#define T1_SGN_S 0x0400 -#define T1_SGN_W 0x0800 +#define T1_SGN_N 0x0100U +#define T1_SGN_E 0x0200U +#define T1_SGN_S 0x0400U +#define T1_SGN_W 0x0800U #define T1_SGN (T1_SGN_N|T1_SGN_E|T1_SGN_S|T1_SGN_W) -#define T1_SIG 0x1000 /**< No longer used by decoder */ -#define T1_REFINE 0x2000 /**< No longer used by decoder */ -#define T1_VISIT 0x4000 /**< No longer used by decoder */ +#define T1_SIG 0x1000U /**< No longer used by decoder */ +#define T1_REFINE 0x2000U /**< No longer used by decoder */ +#define T1_VISIT 0x4000U /**< No longer used by decoder */ /* END of flags that apply to opj_flag_t */ -#define T1_NUMCTXS_ZC 9 -#define T1_NUMCTXS_SC 5 +#define T1_NUMCTXS_ZC 9 +#define T1_NUMCTXS_SC 5 #define T1_NUMCTXS_MAG 3 #define T1_NUMCTXS_AGG 1 #define T1_NUMCTXS_UNI 1 -#define T1_CTXNO_ZC 0 -#define T1_CTXNO_SC (T1_CTXNO_ZC+T1_NUMCTXS_ZC) +#define T1_CTXNO_ZC 0 +#define T1_CTXNO_SC (T1_CTXNO_ZC+T1_NUMCTXS_ZC) #define T1_CTXNO_MAG (T1_CTXNO_SC+T1_NUMCTXS_SC) #define T1_CTXNO_AGG (T1_CTXNO_MAG+T1_NUMCTXS_MAG) #define T1_CTXNO_UNI (T1_CTXNO_AGG+T1_NUMCTXS_AGG) -#define T1_NUMCTXS (T1_CTXNO_UNI+T1_NUMCTXS_UNI) +#define T1_NUMCTXS (T1_CTXNO_UNI+T1_NUMCTXS_UNI) #define T1_NMSEDEC_FRACBITS (T1_NMSEDEC_BITS-1) @@ -94,27 +94,27 @@ in T1.C are used by some function in TCD.C. #define T1_TYPE_RAW 1 /**< No encoding the information is store under raw format in codestream (mode switch RAW)*/ /* Those flags are used by opj_colflag_t */ -#define T1_COLFLAG_RBS 4 /* RBS = Row Bit Shift */ -#define T1_COLFLAG_SIG_OTHER_ROW_0 (1 << 0) /**< This sample has at least one significant neighbour */ -#define T1_COLFLAG_SIG_ROW_0 (1 << 1) /**< This sample is significant */ -#define T1_COLFLAG_VISIT_ROW_0 (1 << 2) /**< This sample has been visited */ -#define T1_COLFLAG_REFINE_ROW_0 (1 << 3) /**< This sample has been refined */ -#define T1_COLFLAG_SIG_OTHER_ROW_1 (T1_COLFLAG_SIG_OTHER_ROW_0 << T1_COLFLAG_RBS) -#define T1_COLFLAG_SIG_ROW_1 (T1_COLFLAG_SIG_ROW_0 << T1_COLFLAG_RBS) -#define T1_COLFLAG_VISIT_ROW_1 (T1_COLFLAG_VISIT_ROW_0 << T1_COLFLAG_RBS) -#define T1_COLFLAG_REFINE_ROW_1 (T1_COLFLAG_REFINE_ROW_0 << T1_COLFLAG_RBS) -#define T1_COLFLAG_SIG_OTHER_ROW_2 (T1_COLFLAG_SIG_OTHER_ROW_0 << (2*T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_ROW_2 (T1_COLFLAG_SIG_ROW_0 << (2*T1_COLFLAG_RBS)) -#define T1_COLFLAG_VISIT_ROW_2 (T1_COLFLAG_VISIT_ROW_0 << (2*T1_COLFLAG_RBS)) -#define T1_COLFLAG_REFINE_ROW_2 (T1_COLFLAG_REFINE_ROW_0 << (2*T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_OTHER_ROW_3 (T1_COLFLAG_SIG_OTHER_ROW_0 << (3*T1_COLFLAG_RBS)) -#define T1_COLFLAG_SIG_ROW_3 (T1_COLFLAG_SIG_ROW_0 << (3*T1_COLFLAG_RBS)) -#define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3*T1_COLFLAG_RBS)) -#define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3*T1_COLFLAG_RBS)) +#define T1_COLFLAG_RBS 4U /* RBS = Row Bit Shift */ +#define T1_COLFLAG_SIG_OTHER_ROW_0 (1U << 0U) /**< This sample has at least one significant neighbour */ +#define T1_COLFLAG_SIG_ROW_0 (1U << 1U) /**< This sample is significant */ +#define T1_COLFLAG_VISIT_ROW_0 (1U << 2U) /**< This sample has been visited */ +#define T1_COLFLAG_REFINE_ROW_0 (1U << 3U) /**< This sample has been refined */ +#define T1_COLFLAG_SIG_OTHER_ROW_1 (T1_COLFLAG_SIG_OTHER_ROW_0 << (1U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_ROW_1 (T1_COLFLAG_SIG_ROW_0 << (1U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_VISIT_ROW_1 (T1_COLFLAG_VISIT_ROW_0 << (1U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_REFINE_ROW_1 (T1_COLFLAG_REFINE_ROW_0 << (1U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_OTHER_ROW_2 (T1_COLFLAG_SIG_OTHER_ROW_0 << (2U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_ROW_2 (T1_COLFLAG_SIG_ROW_0 << (2U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_VISIT_ROW_2 (T1_COLFLAG_VISIT_ROW_0 << (2U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_REFINE_ROW_2 (T1_COLFLAG_REFINE_ROW_0 << (2U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_OTHER_ROW_3 (T1_COLFLAG_SIG_OTHER_ROW_0 << (3U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_SIG_ROW_3 (T1_COLFLAG_SIG_ROW_0 << (3U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3U * T1_COLFLAG_RBS)) +#define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3U * T1_COLFLAG_RBS)) /* ----------------------------------------------------------------------- */ -typedef OPJ_INT16 opj_flag_t; +typedef OPJ_UINT16 opj_flag_t; /** Flags for 4 consecutive rows of a column */ typedef OPJ_UINT16 opj_colflag_t; diff --git a/src/lib/openjp2/t1_generate_luts.c b/src/lib/openjp2/t1_generate_luts.c index bc01c994..1a843089 100644 --- a/src/lib/openjp2/t1_generate_luts.c +++ b/src/lib/openjp2/t1_generate_luts.c @@ -38,7 +38,7 @@ #include "opj_includes.h" -static int t1_init_ctxno_zc(int f, int orient) { +static int t1_init_ctxno_zc(unsigned int f, unsigned int orient) { int h, v, d, n, t, hv; h = ((f & T1_SIG_W) != 0) + ((f & T1_SIG_E) != 0); v = ((f & T1_SIG_N) != 0) + ((f & T1_SIG_S) != 0); @@ -113,7 +113,7 @@ static int t1_init_ctxno_zc(int f, int orient) { return (T1_CTXNO_ZC + n); } -static int t1_init_ctxno_sc(int f) { +static int t1_init_ctxno_sc(unsigned int f) { int hc, vc, n; n = 0; @@ -154,7 +154,7 @@ static int t1_init_ctxno_sc(int f) { return (T1_CTXNO_SC + n); } -static int t1_init_spb(int f) { +static int t1_init_spb(unsigned int f) { int hc, vc, n; hc = opj_int_min(((f & (T1_SIG_E | T1_SGN_E)) == @@ -192,7 +192,7 @@ static void dump_array16(int array[],int size){ int main(int argc, char **argv) { - int i, j; + unsigned int i, j; double u, v, t; int lut_ctxno_zc[1024]; @@ -205,47 +205,47 @@ int main(int argc, char **argv) printf("/* This file was automatically generated by t1_generate_luts.c */\n\n"); /* lut_ctxno_zc */ - for (j = 0; j < 4; ++j) { - for (i = 0; i < 256; ++i) { - int orient = j; - if (orient == 2) { - orient = 1; - } else if (orient == 1) { - orient = 2; + for (j = 0U; j < 4U; ++j) { + for (i = 0U; i < 256U; ++i) { + unsigned int orient = j; + if (orient == 2U) { + orient = 1U; + } else if (orient == 1U) { + orient = 2U; } lut_ctxno_zc[(orient << 8) | i] = t1_init_ctxno_zc(i, j); } } printf("static const OPJ_BYTE lut_ctxno_zc[1024] = {\n "); - for (i = 0; i < 1023; ++i) { + for (i = 0U; i < 1023U; ++i) { printf("%i, ", lut_ctxno_zc[i]); - if(!((i+1)&0x1f)) + if(!((i+1U)&0x1fU)) printf("\n "); } printf("%i\n};\n\n", lut_ctxno_zc[1023]); /* lut_ctxno_sc */ printf("static const OPJ_BYTE lut_ctxno_sc[256] = {\n "); - for (i = 0; i < 255; ++i) { + for (i = 0U; i < 255U; ++i) { printf("0x%x, ", t1_init_ctxno_sc(i << 4)); - if(!((i+1)&0xf)) + if(!((i+1U)&0xfU)) printf("\n "); } - printf("0x%x\n};\n\n", t1_init_ctxno_sc(255 << 4)); + printf("0x%x\n};\n\n", t1_init_ctxno_sc(255U << 4)); /* lut_spb */ printf("static const OPJ_BYTE lut_spb[256] = {\n "); - for (i = 0; i < 255; ++i) { + for (i = 0U; i < 255U; ++i) { printf("%i, ", t1_init_spb(i << 4)); - if(!((i+1)&0x1f)) + if(!((i+1U)&0x1fU)) printf("\n "); } - printf("%i\n};\n\n", t1_init_spb(255 << 4)); + printf("%i\n};\n\n", t1_init_spb(255U << 4)); /* FIXME FIXME FIXME */ /* fprintf(stdout,"nmsedec luts:\n"); */ - for (i = 0; i < (1 << T1_NMSEDEC_BITS); ++i) { + for (i = 0U; i < (1U << T1_NMSEDEC_BITS); ++i) { t = i / pow(2, T1_NMSEDEC_FRACBITS); u = t; v = t - 1.5; @@ -269,17 +269,17 @@ int main(int argc, char **argv) (int) (floor((u * u) * pow(2, T1_NMSEDEC_FRACBITS) + 0.5) / pow(2, T1_NMSEDEC_FRACBITS) * 8192.0)); } - printf("static const OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = {\n "); - dump_array16(lut_nmsedec_sig, 1 << T1_NMSEDEC_BITS); + printf("static const OPJ_INT16 lut_nmsedec_sig[1U << T1_NMSEDEC_BITS] = {\n "); + dump_array16(lut_nmsedec_sig, 1U << T1_NMSEDEC_BITS); - printf("static const OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = {\n "); - dump_array16(lut_nmsedec_sig0, 1 << T1_NMSEDEC_BITS); + printf("static const OPJ_INT16 lut_nmsedec_sig0[1U << T1_NMSEDEC_BITS] = {\n "); + dump_array16(lut_nmsedec_sig0, 1U << T1_NMSEDEC_BITS); - printf("static const OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = {\n "); - dump_array16(lut_nmsedec_ref, 1 << T1_NMSEDEC_BITS); + printf("static const OPJ_INT16 lut_nmsedec_ref[1U << T1_NMSEDEC_BITS] = {\n "); + dump_array16(lut_nmsedec_ref, 1U << T1_NMSEDEC_BITS); - printf("static const OPJ_INT16 lut_nmsedec_ref0[1 << T1_NMSEDEC_BITS] = {\n "); - dump_array16(lut_nmsedec_ref0, 1 << T1_NMSEDEC_BITS); + printf("static const OPJ_INT16 lut_nmsedec_ref0[1U << T1_NMSEDEC_BITS] = {\n "); + dump_array16(lut_nmsedec_ref0, 1U << T1_NMSEDEC_BITS); return 0; } diff --git a/src/lib/openjp2/t1_luts.h b/src/lib/openjp2/t1_luts.h index c66a8aeb..561133fb 100644 --- a/src/lib/openjp2/t1_luts.h +++ b/src/lib/openjp2/t1_luts.h @@ -65,7 +65,7 @@ static const OPJ_BYTE lut_spb[256] = { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; -static const OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_sig[1U << T1_NMSEDEC_BITS] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -84,7 +84,7 @@ static const OPJ_INT16 lut_nmsedec_sig[1 << T1_NMSEDEC_BITS] = { 0x6c00, 0x6d80, 0x6f00, 0x7080, 0x7200, 0x7380, 0x7500, 0x7680 }; -static const OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_sig0[1U << T1_NMSEDEC_BITS] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0080, 0x0080, 0x0080, 0x0080, 0x0100, 0x0100, 0x0100, 0x0180, 0x0180, 0x0200, 0x0200, 0x0280, 0x0280, 0x0300, 0x0300, 0x0380, 0x0400, 0x0400, @@ -103,7 +103,7 @@ static const OPJ_INT16 lut_nmsedec_sig0[1 << T1_NMSEDEC_BITS] = { 0x7080, 0x7280, 0x7480, 0x7600, 0x7800, 0x7a00, 0x7c00, 0x7e00 }; -static const OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_ref[1U << T1_NMSEDEC_BITS] = { 0x1800, 0x1780, 0x1700, 0x1680, 0x1600, 0x1580, 0x1500, 0x1480, 0x1400, 0x1380, 0x1300, 0x1280, 0x1200, 0x1180, 0x1100, 0x1080, 0x1000, 0x0f80, 0x0f00, 0x0e80, 0x0e00, 0x0d80, 0x0d00, 0x0c80, @@ -122,7 +122,7 @@ static const OPJ_INT16 lut_nmsedec_ref[1 << T1_NMSEDEC_BITS] = { 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780 }; -static const OPJ_INT16 lut_nmsedec_ref0[1 << T1_NMSEDEC_BITS] = { +static const OPJ_INT16 lut_nmsedec_ref0[1U << T1_NMSEDEC_BITS] = { 0x2000, 0x1f00, 0x1e00, 0x1d00, 0x1c00, 0x1b00, 0x1a80, 0x1980, 0x1880, 0x1780, 0x1700, 0x1600, 0x1500, 0x1480, 0x1380, 0x1300, 0x1200, 0x1180, 0x1080, 0x1000, 0x0f00, 0x0e80, 0x0e00, 0x0d00, From 9a07ccb3d0f076388e4da684a3bfd4327125c721 Mon Sep 17 00:00:00 2001 From: Matthieu Darbois Date: Thu, 15 Sep 2016 01:57:53 +0200 Subject: [PATCH 30/33] Add overflow checks for opj_aligned_malloc (#841) See https://pdfium.googlesource.com/pdfium/+/b20ab6c7acb3be1393461eb650ca8fa4660c937e/third_party/libopenjpeg20/0020-opj_aligned_malloc.patch --- src/lib/openjp2/dwt.c | 36 ++++++++++-- src/lib/openjp2/pi.c | 4 +- src/lib/openjp2/t1.c | 125 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 135 insertions(+), 30 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 2d793bb6..98ead59e 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -395,7 +395,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,void OPJ_INT32 rw; /* width of the resolution level computed */ OPJ_INT32 rh; /* height of the resolution level computed */ - OPJ_UINT32 l_data_size; + size_t l_data_size; opj_tcd_resolution_t * l_cur_res = 0; opj_tcd_resolution_t * l_last_res = 0; @@ -407,8 +407,14 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,void l_cur_res = tilec->resolutions + l; l_last_res = l_cur_res - 1; - l_data_size = opj_dwt_max_resolution( tilec->resolutions,tilec->numresolutions) * (OPJ_UINT32)sizeof(OPJ_INT32); - bj = (OPJ_INT32*)opj_malloc((size_t)l_data_size); + l_data_size = opj_dwt_max_resolution( tilec->resolutions,tilec->numresolutions); + /* overflow check */ + if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + l_data_size *= sizeof(OPJ_INT32); + bj = (OPJ_INT32*)opj_malloc(l_data_size); /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */ /* in that case, so do not error out */ if (l_data_size != 0 && ! bj) { @@ -638,7 +644,13 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t return OPJ_TRUE; } num_threads = opj_thread_pool_get_thread_count(tp); - h_mem_size = opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32); + h_mem_size = opj_dwt_max_resolution(tr, numres); + /* overflow check */ + if (h_mem_size > (SIZE_MAX / sizeof(OPJ_INT32))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + h_mem_size *= sizeof(OPJ_INT32); h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); if (! h.mem){ /* FIXME event manager error callback */ @@ -1003,7 +1015,21 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); - h.wavelet = (opj_v4_t*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(opj_v4_t)); + size_t l_data_size; + + l_data_size = opj_dwt_max_resolution(res, numres); + /* overflow check */ + if (l_data_size > (SIZE_MAX - 5U)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + l_data_size += 5U; + /* overflow check */ + if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); if (!h.wavelet) { /* FIXME event manager error callback */ return OPJ_FALSE; diff --git a/src/lib/openjp2/pi.c b/src/lib/openjp2/pi.c index 809b33d7..41a2f046 100644 --- a/src/lib/openjp2/pi.c +++ b/src/lib/openjp2/pi.c @@ -1238,14 +1238,14 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, /* memory allocation for include */ /* prevent an integer overflow issue */ + /* 0 < l_tcp->numlayers < 65536 c.f. opj_j2k_read_cod in j2k.c */ l_current_pi->include = 00; if (l_step_l <= (SIZE_MAX / (l_tcp->numlayers + 1U))) { l_current_pi->include = (OPJ_INT16*) opj_calloc((size_t)(l_tcp->numlayers + 1U) * l_step_l, sizeof(OPJ_INT16)); } - if - (!l_current_pi->include) + if (!l_current_pi->include) { opj_free(l_tmp_data); opj_free(l_tmp_ptr); diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 53451677..453d2908 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1406,56 +1406,135 @@ static OPJ_BOOL opj_t1_allocate_buffers( OPJ_UINT32 w, OPJ_UINT32 h) { - OPJ_UINT32 datasize=w * h; - OPJ_UINT32 flagssize; - /* encoder uses tile buffer, so no need to allocate */ if (!t1->encoder) { - if(datasize > t1->datasize){ + size_t datasize; + +#if (SIZE_MAX / 0xFFFFFFFFU) < 0xFFFFFFFFU /* UINT32_MAX */ + /* Overflow check */ + if ((w > 0U) && ((size_t)h > (SIZE_MAX / (size_t)w))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#endif + datasize = (size_t)w * h; + + /* Overflow check */ + if (datasize > (SIZE_MAX / sizeof(OPJ_INT32))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + + if(datasize > (size_t)t1->datasize){ opj_aligned_free(t1->data); t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32)); if(!t1->data){ /* FIXME event manager error callback */ return OPJ_FALSE; } - t1->datasize=datasize; +#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ + /* TODO remove this if t1->datasize type changes to size_t */ + /* Overflow check */ + if (datasize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#endif + t1->datasize = (OPJ_UINT32)datasize; } /* memset first arg is declared to never be null by gcc */ if (t1->data != NULL) { - memset(t1->data,0,datasize * sizeof(OPJ_INT32)); + memset(t1->data, 0, datasize * sizeof(OPJ_INT32)); } } - t1->flags_stride=w+2; - flagssize=t1->flags_stride * (h+2); - if(flagssize > t1->flagssize){ - opj_aligned_free(t1->flags); - t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(opj_flag_t)); - if(!t1->flags){ + { + size_t flagssize; + + /* Overflow check */ + if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) { /* FIXME event manager error callback */ return OPJ_FALSE; } - t1->flagssize=flagssize; - } - memset(t1->flags,0,flagssize * sizeof(opj_flag_t)); - - if (!t1->encoder) { - OPJ_UINT32 colflags_size=t1->flags_stride * ((h+3) / 4 + 2); + t1->flags_stride = w + 2U; /* can't be 0U */ - if(colflags_size > t1->colflags_size){ +#if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */ + /* Overflow check */ + if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#endif + flagssize = (size_t)h + 3U; + + /* Overflow check */ + if (flagssize > (SIZE_MAX / (size_t)t1->flags_stride)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + flagssize *= (size_t)t1->flags_stride; + + if(flagssize > (size_t)t1->flagssize){ + /* Overflow check */ + if (flagssize > (SIZE_MAX / sizeof(opj_flag_t))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + opj_aligned_free(t1->flags); + t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(opj_flag_t)); + if(!t1->flags){ + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ + /* TODO remove this if t1->flagssize type changes to size_t */ + /* Overflow check */ + if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#endif + t1->flagssize = (OPJ_UINT32)flagssize; + } + memset(t1->flags, 0, flagssize * sizeof(opj_flag_t)); + } + if (!t1->encoder) { + size_t colflags_size = ((((size_t)h + 3U) / 4U) + 2U); /* Can't overflow, h checked against UINT32_MAX - 3U */ + + /* Overflow check */ + if (colflags_size > (SIZE_MAX / (size_t)t1->flags_stride)) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } + colflags_size *= (size_t)t1->flags_stride; + + if(colflags_size > (size_t)t1->colflags_size){ + /* Overflow check */ + if ((size_t)colflags_size > (SIZE_MAX / sizeof(opj_colflag_t))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } opj_aligned_free(t1->colflags); t1->colflags = (opj_colflag_t*) opj_aligned_malloc(colflags_size * sizeof(opj_colflag_t)); if(!t1->colflags){ /* FIXME event manager error callback */ return OPJ_FALSE; } - t1->colflags_size=colflags_size; +#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */ + /* TODO remove this if t1->colflags_size type changes to size_t */ + /* Overflow check */ + if (colflags_size > (size_t)0xFFFFFFFFU /* UINT32_MAX */) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +#endif + t1->colflags_size = (OPJ_UINT32)colflags_size; } - memset(t1->colflags,0,colflags_size * sizeof(opj_colflag_t)); + memset(t1->colflags, 0, colflags_size * sizeof(opj_colflag_t)); } - t1->w=w; - t1->h=h; + t1->w = w; + t1->h = h; return OPJ_TRUE; } From 6e7616c83cb01d9b7a5a35e093ff9ff6741acc74 Mon Sep 17 00:00:00 2001 From: Matthieu Darbois Date: Thu, 15 Sep 2016 23:51:34 +0200 Subject: [PATCH 31/33] Remove TODO for overflow check (#842) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check was already done. It’s been simplified. Reformat to get consistent style throughout the functions. --- src/lib/openjp2/dwt.c | 224 ++++++++++++++++++++++-------------------- 1 file changed, 119 insertions(+), 105 deletions(-) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 98ead59e..9e2a3615 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -625,9 +625,10 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) /* */ -/* Inverse wavelet transform in 2-D. */ +/* Inverse wavelet transform in 2-D. */ /* */ -static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { +static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) +{ opj_dwt_t h; opj_dwt_t v; @@ -673,120 +674,133 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* t h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); h.cas = tr->x0 % 2; - if( num_threads <= 1 || rh == 1 ) - { - for(j = 0; j < rh; ++j) { - opj_dwt_interleave_h(&h, &tiledp[j*w]); - (dwt_1D)(&h); - memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); - } - } - else - { - OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; - if( rh < num_jobs ) { - num_jobs = rh; - } - for( j = 0; j < num_jobs; j++ ) - { - opj_dwd_decode_h_job_t* job; + if( num_threads <= 1 || rh <= 1 ) + { + for(j = 0; j < rh; ++j) { + opj_dwt_interleave_h(&h, &tiledp[j*w]); + (dwt_1D)(&h); + memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); + } + } + else + { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; - job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); - if( !job ) - { - /* It would be nice to fallback to single thread case, but */ - /* unfortunately some jobs may be launched and have modified */ - /* tiledp, so it is not practical to recover from that error */ - /* FIXME event manager error callback */ - opj_thread_pool_wait_completion(tp, 0); - opj_aligned_free(h.mem); - return OPJ_FALSE; - } - job->h = h; - job->dwt_1D = dwt_1D; - job->rw = rw; - job->w = w; - job->tiledp = tiledp; - job->min_j = j * (rh / num_jobs); - job->max_j = (j+1) * (rh / num_jobs); /* TODO this can overflow */ - if( job->max_j > rh || j == num_jobs - 1 ) - job->max_j = rh; - job->h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); - if (!job->h.mem) - { - /* FIXME event manager error callback */ - opj_thread_pool_wait_completion(tp, 0); - opj_free(job); - opj_aligned_free(h.mem); - return OPJ_FALSE; - } - opj_thread_pool_submit_job( tp, opj_dwt_decode_h_func, job ); - } - opj_thread_pool_wait_completion(tp, 0); - } + if( rh < num_jobs ) { + num_jobs = rh; + } + step_j = (rh / num_jobs); + + for(j = 0; j < num_jobs; j++) + { + opj_dwd_decode_h_job_t* job; + + job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); + if( !job ) + { + /* It would be nice to fallback to single thread case, but */ + /* unfortunately some jobs may be launched and have modified */ + /* tiledp, so it is not practical to recover from that error */ + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(h.mem); + return OPJ_FALSE; + } + job->h = h; + job->dwt_1D = dwt_1D; + job->rw = rw; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * step_j; + job->max_j = (j + 1U) * step_j; /* this can overflow */ + if( j == (num_jobs - 1U) ) { /* this will take care of the overflow */ + job->max_j = rh; + } + job->h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); + if (!job->h.mem) + { + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(h.mem); + return OPJ_FALSE; + } + opj_thread_pool_submit_job( tp, opj_dwt_decode_h_func, job ); + } + opj_thread_pool_wait_completion(tp, 0); + } v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); v.cas = tr->y0 % 2; - if( num_threads <= 1 || rw == 1 ) - { - for(j = 0; j < rw; ++j){ - OPJ_UINT32 k; - opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); - (dwt_1D)(&v); - for(k = 0; k < rh; ++k) { - tiledp[k * w + j] = v.mem[k]; - } - } - } - else - { - OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; - if( rw < num_jobs ) - num_jobs = rw; - for( j = 0; j < num_jobs; j++ ) - { - opj_dwd_decode_v_job_t* job; + if( num_threads <= 1 || rw <= 1 ) + { + for(j = 0; j < rw; ++j){ + OPJ_UINT32 k; - job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); - if( !job ) - { - /* It would be nice to fallback to single thread case, but */ - /* unfortunately some jobs may be launched and have modified */ - /* tiledp, so it is not practical to recover from that error */ - /* FIXME event manager error callback */ - opj_thread_pool_wait_completion(tp, 0); - opj_aligned_free(v.mem); - return OPJ_FALSE; - } - job->v = v; - job->dwt_1D = dwt_1D; - job->rh = rh; - job->w = w; - job->tiledp = tiledp; - job->min_j = j * (rw / num_jobs); - job->max_j = (j+1) * (rw / num_jobs); /* TODO this can overflow */ - if( job->max_j > rw || j == num_jobs - 1 ) - job->max_j = rw; - job->v.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); - if (!job->v.mem) - { - /* FIXME event manager error callback */ - opj_thread_pool_wait_completion(tp, 0); - opj_free(job); - opj_aligned_free(v.mem); - return OPJ_FALSE; - } - opj_thread_pool_submit_job( tp, opj_dwt_decode_v_func, job ); - } - opj_thread_pool_wait_completion(tp, 0); - } + opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); + (dwt_1D)(&v); + for(k = 0; k < rh; ++k) { + tiledp[k * w + j] = v.mem[k]; + } + } + } + else + { + OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; + OPJ_UINT32 step_j; + + if( rw < num_jobs ) { + num_jobs = rw; + } + step_j = (rw / num_jobs); + + for( j = 0; j < num_jobs; j++ ) + { + opj_dwd_decode_v_job_t* job; + + job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); + if( !job ) + { + /* It would be nice to fallback to single thread case, but */ + /* unfortunately some jobs may be launched and have modified */ + /* tiledp, so it is not practical to recover from that error */ + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_aligned_free(v.mem); + return OPJ_FALSE; + } + job->v = v; + job->dwt_1D = dwt_1D; + job->rh = rh; + job->w = w; + job->tiledp = tiledp; + job->min_j = j * step_j; + job->max_j = (j + 1U) * step_j; /* this can overflow */ + if( j == (num_jobs - 1U) ) { /* this will take care of the overflow */ + job->max_j = rw; + } + job->v.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size); + if (!job->v.mem) + { + /* FIXME event manager error callback */ + opj_thread_pool_wait_completion(tp, 0); + opj_free(job); + opj_aligned_free(v.mem); + return OPJ_FALSE; + } + opj_thread_pool_submit_job( tp, opj_dwt_decode_v_func, job ); + } + opj_thread_pool_wait_completion(tp, 0); + } } opj_aligned_free(h.mem); return OPJ_TRUE; } -static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT w, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_INT32 x, OPJ_INT32 size){ +static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT w, OPJ_FLOAT32* OPJ_RESTRICT a, OPJ_INT32 x, OPJ_INT32 size) +{ OPJ_FLOAT32* OPJ_RESTRICT bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); OPJ_INT32 count = w->sn; OPJ_INT32 i, k; From 3aaeea7ce8c2065d29483817b7fd07226601851c Mon Sep 17 00:00:00 2001 From: Antonin Descampe Date: Fri, 16 Sep 2016 17:54:12 +0200 Subject: [PATCH 32/33] renamed USE_THREAD to OPJ_USE_THREAD renaming to be consistent with previous OPJ CMake options --- src/lib/openjp2/CMakeLists.txt | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index f45ceb34..014be603 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -127,7 +127,7 @@ endif() # Experimental option; let's how cppcheck performs # Implementation details: -# I could not figure out how to easily upload a file to CDash. Instead simply +# I could not figure out how to easily upload a file to CDash. Instead simply # pretend cppcheck is part of the Build step. Technically cppcheck can even # output gcc formatted error/warning report # Another implementation detail: I could not redirect error to the error @@ -156,30 +156,30 @@ endif() ################################################################################# set(CMAKE_THREAD_PREFER_PTHREAD TRUE) -option(USE_THREAD "Build with thread/mutex support " ON) -if(NOT USE_THREAD) +option(OPJ_USE_THREAD "Build with thread/mutex support " ON) +if(NOT OPJ_USE_THREAD) add_definitions( -DMUTEX_stub) -endif(NOT USE_THREAD) +endif(NOT OPJ_USE_THREAD) find_package(Threads QUIET) -if(USE_THREAD AND WIN32 AND NOT Threads_FOUND ) +if(OPJ_USE_THREAD AND WIN32 AND NOT Threads_FOUND ) add_definitions( -DMUTEX_win32) set(Threads_FOUND YES) endif() -if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT ) +if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT ) add_definitions( -DMUTEX_win32) -endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT ) +endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_WIN32_THREADS_INIT ) -if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT ) +if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT ) add_definitions( -DMUTEX_pthread) -endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT ) +endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT ) -if(USE_THREAD AND NOT Threads_FOUND) - message(FATAL_ERROR "No thread library found and thread/mutex support is required by USE_THREAD option") -endif(USE_THREAD AND NOT Threads_FOUND) +if(OPJ_USE_THREAD AND NOT Threads_FOUND) + message(FATAL_ERROR "No thread library found and thread/mutex support is required by OPJ_USE_THREAD option") +endif(OPJ_USE_THREAD AND NOT Threads_FOUND) -if(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) +if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT}) -endif(USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) +endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) From 31d44f02807a6ce02feead4326e58fac9681c045 Mon Sep 17 00:00:00 2001 From: Antonin Descampe Date: Tue, 20 Sep 2016 18:48:06 +0200 Subject: [PATCH 33/33] Disable automatic compilation of t1_generate_luts Fix #831 --- CMakeLists.txt | 3 ++- src/lib/openjp2/CMakeLists.txt | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b187a13..f55be537 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -226,7 +226,7 @@ CHECK_INCLUDE_FILE("unistd.h" HAVE_UNISTD_H) include(TestLargeFiles) OPJ_TEST_LARGE_FILES(OPJ_HAVE_LARGEFILES) -# Allocating Aligned Memory Blocks +# Allocating Aligned Memory Blocks include(CheckIncludeFiles) check_include_files(malloc.h OPJ_HAVE_MALLOC_H) include(CheckSymbolExists) @@ -249,6 +249,7 @@ if(BUILD_JPIP_SERVER) endif() endif() add_subdirectory(src/lib) +option(BUILD_LUTS_GENERATOR "Build utility to generate t1_luts.h" OFF) #----------------------------------------------------------------------------- # Build Applications diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index 014be603..b36905c9 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -118,11 +118,13 @@ install( DESTINATION ${OPENJPEG_INSTALL_MAN_DIR}/man3) endif() -# internal utilities to generate t1_luts.h (part of the jp2 lib) +if(BUILD_LUTS_GENERATOR) +# internal utility to generate t1_luts.h (part of the jp2 lib) # no need to install: -add_executable(t1_generate_luts t1_generate_luts.c) -if(UNIX) - target_link_libraries(t1_generate_luts m) + add_executable(t1_generate_luts t1_generate_luts.c) + if(UNIX) + target_link_libraries(t1_generate_luts m) + endif() endif() # Experimental option; let's how cppcheck performs