Significant optimizations of MCT, DWT, MQ and T1 modules by Peter Wimmer (thanks Peter)

This commit is contained in:
Francois-Olivier Devaux 2010-04-08 17:22:58 +00:00
parent d03779ee2a
commit 627f484bce
9 changed files with 612 additions and 86 deletions

View File

@ -5,6 +5,9 @@ What's New for OpenJPEG
! : changed
+ : added
April 8, 2010
! [FOD] Significant optimizations of MCT, DWT, MQ and T1 modules by Peter Wimmer (thanks Peter)
March 26, 2010
+ [FOD] Added support for "jpc" codestreams which are equivalent to "j2c" codestreams. Thanks to Winfried for this patch
* [FOD] Added support for PNG image format [Not yet functional under WIN32]. Thanks to Winfried for this patch. See details here http://code.google.com/p/openjpeg/issues/detail?id=16

View File

@ -570,6 +570,20 @@ static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, in
int count = w->sn;
int i, k;
for(k = 0; k < 2; ++k){
if (count + 3 * x < size && ((int) a & 0x0f) == 0 && ((int) bi & 0x0f) == 0 && (x & 0x0f) == 0) {
/* Fast code path */
for(i = 0; i < count; ++i){
int j = i;
bi[i*8 ] = a[j];
j += x;
bi[i*8 + 1] = a[j];
j += x;
bi[i*8 + 2] = a[j];
j += x;
bi[i*8 + 3] = a[j];
}
} else {
/* Slow code path */
for(i = 0; i < count; ++i){
int j = i;
bi[i*8 ] = a[j];
@ -583,6 +597,7 @@ static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, in
if(j > size) continue;
bi[i*8 + 3] = a[j];
}
}
bi = (float*) (w->wavelet + 1 - w->cas);
a += w->sn;
size -= w->sn;
@ -608,9 +623,21 @@ static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x){
static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){
__m128* restrict vw = (__m128*) w;
int i;
/* 4x unrolled loop */
for(i = 0; i < count >> 2; ++i){
*vw = _mm_mul_ps(*vw, c);
vw += 2;
*vw = _mm_mul_ps(*vw, c);
vw += 2;
*vw = _mm_mul_ps(*vw, c);
vw += 2;
*vw = _mm_mul_ps(*vw, c);
vw += 2;
}
count &= 3;
for(i = 0; i < count; ++i){
__m128 tmp = vw[i*2];
vw[i*2] = tmp * c;
*vw = _mm_mul_ps(*vw, c);
vw += 2;
}
}
@ -618,22 +645,24 @@ static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){
__m128* restrict vl = (__m128*) l;
__m128* restrict vw = (__m128*) w;
int i;
__m128 tmp1, tmp2, tmp3;
tmp1 = vl[0];
for(i = 0; i < m; ++i){
__m128 tmp1 = vl[ 0];
__m128 tmp2 = vw[-1];
__m128 tmp3 = vw[ 0];
vw[-1] = tmp2 + ((tmp1 + tmp3) * c);
vl = vw;
tmp2 = vw[-1];
tmp3 = vw[ 0];
vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
tmp1 = tmp3;
vw += 2;
}
vl = vw - 2;
if(m >= k){
return;
}
c += c;
c *= vl[0];
c = _mm_add_ps(c, c);
c = _mm_mul_ps(c, vl[0]);
for(; m < k; ++m){
__m128 tmp = vw[-1];
vw[-1] = tmp + c;
vw[-1] = _mm_add_ps(tmp, c);
vw += 2;
}
}
@ -773,19 +802,24 @@ void dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){
h.dn = rw - h.sn;
h.cas = res->x0 % 2;
for(j = rh; j > 0; j -= 4){
for(j = rh; j > 3; j -= 4){
int k;
v4dwt_interleave_h(&h, aj, w, bufsize);
v4dwt_decode(&h);
if(j >= 4){
int k;
for(k = rw; --k >= 0;){
aj[k ] = h.wavelet[k].f[0];
aj[k+w ] = h.wavelet[k].f[1];
aj[k+w*2] = h.wavelet[k].f[2];
aj[k+w*3] = h.wavelet[k].f[3];
}
}else{
aj += w*4;
bufsize -= w*4;
}
if (rh & 0x03) {
int k;
j = rh & 0x03;
v4dwt_interleave_h(&h, aj, w, bufsize);
v4dwt_decode(&h);
for(k = rw; --k >= 0;){
switch(j) {
case 3: aj[k+w*2] = h.wavelet[k].f[2];
@ -794,30 +828,29 @@ void dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){
}
}
}
aj += w*4;
bufsize -= w*4;
}
v.dn = rh - v.sn;
v.cas = res->y0 % 2;
aj = (float*) tilec->data;
for(j = rw; j > 0; j -= 4){
for(j = rw; j > 3; j -= 4){
int k;
v4dwt_interleave_v(&v, aj, w);
v4dwt_decode(&v);
if(j >= 4){
int k;
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(float));
}
}else{
aj += 4;
}
if (rw & 0x03){
int k;
j = rw & 0x03;
v4dwt_interleave_v(&v, aj, w);
v4dwt_decode(&v);
for(k = 0; k < rh; ++k){
memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(float));
}
}
aj += 4;
}
}
opj_aligned_free(h.wavelet);

View File

@ -45,12 +45,12 @@ The functions in J2K.C have for goal to read/write the several parts of the code
#define J2K_CP_CSTY_SOP 0x02
#define J2K_CP_CSTY_EPH 0x04
#define J2K_CCP_CSTY_PRT 0x01
#define J2K_CCP_CBLKSTY_LAZY 0x01
#define J2K_CCP_CBLKSTY_RESET 0x02
#define J2K_CCP_CBLKSTY_TERMALL 0x04
#define J2K_CCP_CBLKSTY_VSC 0x08
#define J2K_CCP_CBLKSTY_PTERM 0x10
#define J2K_CCP_CBLKSTY_SEGSYM 0x20
#define J2K_CCP_CBLKSTY_LAZY 0x01 /**< Selective arithmetic coding bypass */
#define J2K_CCP_CBLKSTY_RESET 0x02 /**< Reset context probabilities on coding pass boundaries */
#define J2K_CCP_CBLKSTY_TERMALL 0x04 /**< Termination on each coding pass */
#define J2K_CCP_CBLKSTY_VSC 0x08 /**< Vertically stripe causal context */
#define J2K_CCP_CBLKSTY_PTERM 0x10 /**< Predictable termination */
#define J2K_CCP_CBLKSTY_SEGSYM 0x20 /**< Segmentation symbols are used */
#define J2K_CCP_QNTSTY_NOQNT 0
#define J2K_CCP_QNTSTY_SIQNT 1
#define J2K_CCP_QNTSTY_SEQNT 2

View File

@ -29,6 +29,10 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef __SSE__
#include <xmmintrin.h>
#endif
#include "opj_includes.h"
/* <summary> */
@ -127,6 +131,44 @@ void mct_decode_real(
int n)
{
int i;
#ifdef __SSE__
__m128 vrv, vgu, vgv, vbu;
vrv = _mm_set1_ps(1.402f);
vgu = _mm_set1_ps(0.34413f);
vgv = _mm_set1_ps(0.71414f);
vbu = _mm_set1_ps(1.772f);
for (i = 0; i < (n >> 3); ++i) {
__m128 vy, vu, vv;
__m128 vr, vg, vb;
vy = _mm_load_ps(c0);
vu = _mm_load_ps(c1);
vv = _mm_load_ps(c2);
vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
_mm_store_ps(c0, vr);
_mm_store_ps(c1, vg);
_mm_store_ps(c2, vb);
c0 += 4;
c1 += 4;
c2 += 4;
vy = _mm_load_ps(c0);
vu = _mm_load_ps(c1);
vv = _mm_load_ps(c2);
vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
_mm_store_ps(c0, vr);
_mm_store_ps(c1, vg);
_mm_store_ps(c2, vb);
c0 += 4;
c1 += 4;
c2 += 4;
}
n &= 7;
#endif
for(i = 0; i < n; ++i) {
float y = c0[i];
float u = c1[i];

View File

@ -68,24 +68,23 @@ FIXME: documentation ???
@param mqc MQC handle
@return
*/
static int mqc_mpsexchange(opj_mqc_t *mqc);
static INLINE int mqc_mpsexchange(opj_mqc_t *const mqc);
/**
FIXME: documentation ???
@param mqc MQC handle
@return
*/
static int mqc_lpsexchange(opj_mqc_t *mqc);
static INLINE int mqc_lpsexchange(opj_mqc_t *const mqc);
/**
Input a byte
@param mqc MQC handle
*/
static void mqc_bytein(opj_mqc_t *mqc);
static INLINE void mqc_bytein(opj_mqc_t *const mqc);
/**
Renormalize mqc->a and mqc->c while decoding
@param mqc MQC handle
*/
static void mqc_renormd(opj_mqc_t *mqc);
static INLINE void mqc_renormd(opj_mqc_t *const mqc);
/*@}*/
/*@}*/
@ -271,7 +270,7 @@ static void mqc_setbits(opj_mqc_t *mqc) {
}
}
static int mqc_mpsexchange(opj_mqc_t *mqc) {
static INLINE int mqc_mpsexchange(opj_mqc_t *const mqc) {
int d;
if (mqc->a < (*mqc->curctx)->qeval) {
d = 1 - (*mqc->curctx)->mps;
@ -284,7 +283,7 @@ static int mqc_mpsexchange(opj_mqc_t *mqc) {
return d;
}
static int mqc_lpsexchange(opj_mqc_t *mqc) {
static INLINE int mqc_lpsexchange(opj_mqc_t *const mqc) {
int d;
if (mqc->a < (*mqc->curctx)->qeval) {
mqc->a = (*mqc->curctx)->qeval;
@ -299,7 +298,15 @@ static int mqc_lpsexchange(opj_mqc_t *mqc) {
return d;
}
static void mqc_bytein(opj_mqc_t *mqc) {
#ifdef MQC_PERF_OPT
static INLINE void mqc_bytein(opj_mqc_t *const mqc) {
unsigned int i = *((unsigned int *) mqc->bp);
mqc->c += i & 0xffff00;
mqc->ct = i & 0x0f;
mqc->bp += (i >> 2) & 0x04;
}
#else
static void mqc_bytein(opj_mqc_t *const mqc) {
if (mqc->bp != mqc->end) {
unsigned int c;
if (mqc->bp + 1 != mqc->end) {
@ -326,8 +333,9 @@ static void mqc_bytein(opj_mqc_t *mqc) {
mqc->ct = 8;
}
}
#endif
static void mqc_renormd(opj_mqc_t *mqc) {
static INLINE void mqc_renormd(opj_mqc_t *const mqc) {
do {
if (mqc->ct == 0) {
mqc_bytein(mqc);
@ -346,11 +354,19 @@ static void mqc_renormd(opj_mqc_t *mqc) {
opj_mqc_t* mqc_create(void) {
opj_mqc_t *mqc = (opj_mqc_t*)opj_malloc(sizeof(opj_mqc_t));
#ifdef MQC_PERF_OPT
mqc->buffer = NULL;
#endif
return mqc;
}
void mqc_destroy(opj_mqc_t *mqc) {
if(mqc) {
#ifdef MQC_PERF_OPT
if (mqc->buffer) {
opj_free(mqc->buffer);
}
#endif
opj_free(mqc);
}
}
@ -499,13 +515,52 @@ void mqc_init_dec(opj_mqc_t *mqc, unsigned char *bp, int len) {
mqc->bp = bp;
if (len==0) mqc->c = 0xff << 16;
else mqc->c = *mqc->bp << 16;
#ifdef MQC_PERF_OPT
{
unsigned int c;
unsigned int *ip;
unsigned char *end = mqc->end - 1;
mqc->buffer = opj_realloc(mqc->buffer, (2 * len + 1) * sizeof(unsigned int));
ip = (unsigned int *) mqc->buffer;
while (bp != end) {
c = *(bp + 1);
if (*bp == 0xff) {
if (c > 0x8f) {
*ip = 0x0000ff18;
} else {
bp++;
*ip = 0x00000017 | (c << 9);
}
} else {
bp++;
*ip = 0x00000018 | (c << 8);
}
ip++;
}
/* Handle last byte of data */
c = 0xff;
if (*bp == 0xff) {
*ip = 0x0000ff18;
} else {
bp++;
*ip = 0x00000018 | (c << 8);
}
ip++;
*ip = 0x0000ff08;
mqc->bp = mqc->buffer;
}
#endif
mqc_bytein(mqc);
mqc->c <<= 7;
mqc->ct -= 7;
mqc->a = 0x8000;
}
int mqc_decode(opj_mqc_t *mqc) {
INLINE int mqc_decode(opj_mqc_t *const mqc) {
int d;
mqc->a -= (*mqc->curctx)->qeval;
if ((mqc->c >> 16) < (*mqc->curctx)->qeval) {

View File

@ -70,6 +70,9 @@ typedef struct opj_mqc {
unsigned char *end;
opj_mqc_state_t *ctxs[MQC_NUMCTXS];
opj_mqc_state_t **curctx;
#ifdef MQC_PERF_OPT
unsigned char *buffer;
#endif
} opj_mqc_t;
/** @name Exported functions */
@ -188,7 +191,7 @@ Decode a symbol
@param mqc MQC handle
@return Returns the decoded symbol (0 or 1)
*/
int mqc_decode(opj_mqc_t *mqc);
int mqc_decode(opj_mqc_t *const mqc);
/* ----------------------------------------------------------------------- */
/*@}*/

View File

@ -65,7 +65,7 @@ Most compilers implement their own version of this keyword ...
*/
#ifndef INLINE
#if defined(_MSC_VER)
#define INLINE __inline
#define INLINE __forceinline
#elif defined(__GNUC__)
#define INLINE __inline__
#elif defined(__MWERKS__)

View File

@ -45,7 +45,11 @@ Allocate an uninitialized memory block
@param size Bytes to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
#ifdef ALLOC_PERF_OPT
void * OPJ_CALLCONV opj_malloc(size_t size);
#else
#define opj_malloc(size) malloc(size)
#endif
/**
Allocate a memory block with elements initialized to 0
@ -53,7 +57,11 @@ Allocate a memory block with elements initialized to 0
@param size Bytes per block to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
#ifdef ALLOC_PERF_OPT
void * OPJ_CALLCONV opj_calloc(size_t _NumOfElements, size_t _SizeOfElements);
#else
#define opj_calloc(num, size) calloc(num, size)
#endif
/**
Allocate memory aligned to a 16 byte boundry
@ -113,19 +121,34 @@ Allocate memory aligned to a 16 byte boundry
#define opj_aligned_free(m) free(m)
#endif
#ifdef ALLOC_PERF_OPT
#undef opj_aligned_malloc
#define opj_aligned_malloc(size) opj_malloc(size)
#undef opj_aligned_free
#define opj_aligned_free(m) opj_free(m)
#endif
/**
Reallocate memory blocks.
@param memblock Pointer to previously allocated memory block
@param size New size in bytes
@return Returns a void pointer to the reallocated (and possibly moved) memory block
*/
#ifdef ALLOC_PERF_OPT
void * OPJ_CALLCONV opj_realloc(void * _Memory, size_t NewSize);
#else
#define opj_realloc(m, s) realloc(m, s)
#endif
/**
Deallocates or frees a memory block.
@param memblock Previously allocated memory block to be freed
*/
#ifdef ALLOC_PERF_OPT
void OPJ_CALLCONV opj_free(void * _Memory);
#else
#define opj_free(m) free(m)
#endif
#ifdef __GNUC__
#pragma GCC poison malloc calloc realloc free

View File

@ -62,13 +62,25 @@ static void t1_enc_sigpass_step(
/**
Decode significant pass
*/
static void t1_dec_sigpass_step(
static INLINE void t1_dec_sigpass_step_raw(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf,
int vsc);
static INLINE void t1_dec_sigpass_step_mqc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf);
static INLINE void t1_dec_sigpass_step_mqc_vsc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf,
char type,
int vsc);
/**
Encode significant pass
@ -83,12 +95,19 @@ static void t1_enc_sigpass(
/**
Decode significant pass
*/
static void t1_dec_sigpass(
static void t1_dec_sigpass_raw(
opj_t1_t *t1,
int bpno,
int orient,
char type,
int cblksty);
static void t1_dec_sigpass_mqc(
opj_t1_t *t1,
int bpno,
int orient);
static void t1_dec_sigpass_mqc_vsc(
opj_t1_t *t1,
int bpno,
int orient);
/**
Encode refinement pass
*/
@ -104,14 +123,27 @@ static void t1_enc_refpass_step(
/**
Decode refinement pass
*/
static void t1_dec_refpass_step(
static void INLINE t1_dec_refpass_step_raw(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int poshalf,
int neghalf,
char type,
int vsc);
static void INLINE t1_dec_refpass_step_mqc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int poshalf,
int neghalf);
static void INLINE t1_dec_refpass_step_mqc_vsc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int poshalf,
int neghalf,
int vsc);
/**
Encode refinement pass
*/
@ -124,11 +156,16 @@ static void t1_enc_refpass(
/**
Decode refinement pass
*/
static void t1_dec_refpass(
static void t1_dec_refpass_raw(
opj_t1_t *t1,
int bpno,
char type,
int cblksty);
static void t1_dec_refpass_mqc(
opj_t1_t *t1,
int bpno);
static void t1_dec_refpass_mqc_vsc(
opj_t1_t *t1,
int bpno);
/**
Encode clean-up pass
*/
@ -145,7 +182,19 @@ static void t1_enc_clnpass_step(
/**
Decode clean-up pass
*/
static void t1_dec_clnpass_step_partial(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf);
static void t1_dec_clnpass_step(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf);
static void t1_dec_clnpass_step_vsc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
@ -323,29 +372,42 @@ static void t1_enc_sigpass_step(
}
}
static void t1_dec_sigpass_step(
static INLINE void t1_dec_sigpass_step_raw(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf,
char type,
int vsc)
{
int v, flag;
opj_raw_t *raw = t1->raw; /* RAW component */
opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) {
if (type == T1_TYPE_RAW) {
if (raw_decode(raw)) {
v = raw_decode(raw); /* ESSAI */
*datap = v ? -oneplushalf : oneplushalf;
t1_updateflags(flagsp, v, t1->flags_stride);
}
} else {
*flagsp |= T1_VISIT;
}
} /* VSC and BYPASS by Antonin */
static INLINE void t1_dec_sigpass_step_mqc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf)
{
int v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = *flagsp;
if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) {
mqc_setcurctx(mqc, t1_getctxno_zc(flag, orient));
if (mqc_decode(mqc)) {
mqc_setcurctx(mqc, t1_getctxno_sc(flag));
@ -353,6 +415,30 @@ static void t1_dec_sigpass_step(
*datap = v ? -oneplushalf : oneplushalf;
t1_updateflags(flagsp, v, t1->flags_stride);
}
*flagsp |= T1_VISIT;
}
} /* VSC and BYPASS by Antonin */
static INLINE void t1_dec_sigpass_step_mqc_vsc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf,
int vsc)
{
int v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) {
mqc_setcurctx(mqc, t1_getctxno_zc(flag, orient));
if (mqc_decode(mqc)) {
mqc_setcurctx(mqc, t1_getctxno_sc(flag));
v = mqc_decode(mqc) ^ t1_getspb(flag);
*datap = v ? -oneplushalf : oneplushalf;
t1_updateflags(flagsp, v, t1->flags_stride);
}
*flagsp |= T1_VISIT;
}
@ -388,11 +474,10 @@ static void t1_enc_sigpass(
}
}
static void t1_dec_sigpass(
static void t1_dec_sigpass_raw(
opj_t1_t *t1,
int bpno,
int orient,
char type,
int cblksty)
{
int i, j, k, one, half, oneplushalf, vsc;
@ -403,13 +488,79 @@ static void t1_dec_sigpass(
for (i = 0; i < t1->w; ++i) {
for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0;
t1_dec_sigpass_step(
t1_dec_sigpass_step_raw(
t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1],
&t1->data[(j * t1->w) + i],
orient,
oneplushalf,
vsc);
}
}
}
} /* VSC and BYPASS by Antonin */
static void t1_dec_sigpass_mqc(
opj_t1_t *t1,
int bpno,
int orient)
{
int i, j, k, one, half, oneplushalf;
int *data1 = t1->data;
flag_t *flags1 = &t1->flags[1];
one = 1 << bpno;
half = one >> 1;
oneplushalf = one | half;
for (k = 0; k < (t1->h & ~3); k += 4) {
for (i = 0; i < t1->w; ++i) {
int *data2 = data1 + i;
flag_t *flags2 = flags1 + i;
flags2 += t1->flags_stride;
t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
}
data1 += t1->w << 2;
flags1 += t1->flags_stride << 2;
}
for (i = 0; i < t1->w; ++i) {
int *data2 = data1 + i;
flag_t *flags2 = flags1 + i;
for (j = k; j < t1->h; ++j) {
flags2 += t1->flags_stride;
t1_dec_sigpass_step_mqc(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
}
}
} /* VSC and BYPASS by Antonin */
static void t1_dec_sigpass_mqc_vsc(
opj_t1_t *t1,
int bpno,
int orient)
{
int i, j, k, one, half, oneplushalf, vsc;
one = 1 << bpno;
half = one >> 1;
oneplushalf = one | half;
for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) {
for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0;
t1_dec_sigpass_step_mqc_vsc(
t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1],
&t1->data[(j * t1->w) + i],
orient,
oneplushalf,
type,
vsc);
}
}
@ -444,28 +595,64 @@ static void t1_enc_refpass_step(
}
}
static void t1_dec_refpass_step(
static INLINE void t1_dec_refpass_step_raw(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int poshalf,
int neghalf,
int vsc)
{
int v, t, flag;
opj_raw_t *raw = t1->raw; /* RAW component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) {
v = raw_decode(raw);
t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t;
*flagsp |= T1_REFINE;
}
} /* VSC and BYPASS by Antonin */
static INLINE void t1_dec_refpass_step_mqc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int poshalf,
int neghalf)
{
int v, t, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = *flagsp;
if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) {
mqc_setcurctx(mqc, t1_getctxno_mag(flag)); /* ESSAI */
v = mqc_decode(mqc);
t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t;
*flagsp |= T1_REFINE;
}
} /* VSC and BYPASS by Antonin */
static INLINE void t1_dec_refpass_step_mqc_vsc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int poshalf,
int neghalf,
char type,
int vsc)
{
int v, t, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
opj_raw_t *raw = t1->raw; /* RAW component */
flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) : (*flagsp);
if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) {
mqc_setcurctx(mqc, t1_getctxno_mag(flag)); /* ESSAI */
if (type == T1_TYPE_RAW) {
v = raw_decode(raw);
} else {
v = mqc_decode(mqc);
}
t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t;
*flagsp |= T1_REFINE;
@ -500,10 +687,9 @@ static void t1_enc_refpass(
}
}
static void t1_dec_refpass(
static void t1_dec_refpass_raw(
opj_t1_t *t1,
int bpno,
char type,
int cblksty)
{
int i, j, k, one, poshalf, neghalf;
@ -515,13 +701,78 @@ static void t1_dec_refpass(
for (i = 0; i < t1->w; ++i) {
for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0;
t1_dec_refpass_step(
t1_dec_refpass_step_raw(
t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1],
&t1->data[(j * t1->w) + i],
poshalf,
neghalf,
vsc);
}
}
}
} /* VSC and BYPASS by Antonin */
static void t1_dec_refpass_mqc(
opj_t1_t *t1,
int bpno)
{
int i, j, k, one, poshalf, neghalf;
int *data1 = t1->data;
flag_t *flags1 = &t1->flags[1];
one = 1 << bpno;
poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1;
for (k = 0; k < (t1->h & ~3); k += 4) {
for (i = 0; i < t1->w; ++i) {
int *data2 = data1 + i;
flag_t *flags2 = flags1 + i;
flags2 += t1->flags_stride;
t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf);
data2 += t1->w;
}
data1 += t1->w << 2;
flags1 += t1->flags_stride << 2;
}
for (i = 0; i < t1->w; ++i) {
int *data2 = data1 + i;
flag_t *flags2 = flags1 + i;
for (j = k; j < t1->h; ++j) {
flags2 += t1->flags_stride;
t1_dec_refpass_step_mqc(t1, flags2, data2, poshalf, neghalf);
data2 += t1->w;
}
}
} /* VSC and BYPASS by Antonin */
static void t1_dec_refpass_mqc_vsc(
opj_t1_t *t1,
int bpno)
{
int i, j, k, one, poshalf, neghalf;
int vsc;
one = 1 << bpno;
poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1;
for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) {
for (j = k; j < k + 4 && j < t1->h; ++j) {
vsc = ((j == k + 3 || j == t1->h - 1)) ? 1 : 0;
t1_dec_refpass_step_mqc_vsc(
t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1],
&t1->data[(j * t1->w) + i],
poshalf,
neghalf,
type,
vsc);
}
}
@ -563,7 +814,50 @@ LABEL_PARTIAL:
*flagsp &= ~T1_VISIT;
}
static void t1_dec_clnpass_step_partial(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf)
{
int v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = *flagsp;
mqc_setcurctx(mqc, t1_getctxno_sc(flag));
v = mqc_decode(mqc) ^ t1_getspb(flag);
*datap = v ? -oneplushalf : oneplushalf;
t1_updateflags(flagsp, v, t1->flags_stride);
*flagsp &= ~T1_VISIT;
} /* VSC and BYPASS by Antonin */
static void t1_dec_clnpass_step(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
int orient,
int oneplushalf)
{
int v, flag;
opj_mqc_t *mqc = t1->mqc; /* MQC component */
flag = *flagsp;
if (!(flag & (T1_SIG | T1_VISIT))) {
mqc_setcurctx(mqc, t1_getctxno_zc(flag, orient));
if (mqc_decode(mqc)) {
mqc_setcurctx(mqc, t1_getctxno_sc(flag));
v = mqc_decode(mqc) ^ t1_getspb(flag);
*datap = v ? -oneplushalf : oneplushalf;
t1_updateflags(flagsp, v, t1->flags_stride);
}
}
*flagsp &= ~T1_VISIT;
} /* VSC and BYPASS by Antonin */
static void t1_dec_clnpass_step_vsc(
opj_t1_t *t1,
flag_t *flagsp,
int *datap,
@ -591,7 +885,7 @@ LABEL_PARTIAL:
}
}
*flagsp &= ~T1_VISIT;
} /* VSC and BYPASS by Antonin */
}
static void t1_enc_clnpass(
opj_t1_t *t1,
@ -671,21 +965,15 @@ static void t1_dec_clnpass(
one = 1 << bpno;
half = one >> 1;
oneplushalf = one | half;
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
for (k = 0; k < t1->h; k += 4) {
for (i = 0; i < t1->w; ++i) {
if (k + 3 < t1->h) {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
agg = !(MACRO_t1_flags(1 + k,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 1,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 2,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| (MACRO_t1_flags(1 + k + 3,1 + i)
& (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT | T1_SIG_OTH));
} else {
agg = !(MACRO_t1_flags(1 + k,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 1,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 2,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 3,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH));
}
} else {
agg = 0;
}
@ -701,8 +989,8 @@ static void t1_dec_clnpass(
runlen = 0;
}
for (j = k + runlen; j < k + 4 && j < t1->h; ++j) {
vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 || j == t1->h - 1)) ? 1 : 0;
t1_dec_clnpass_step(
vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0;
t1_dec_clnpass_step_vsc(
t1,
&t1->flags[((j+1) * t1->flags_stride) + i + 1],
&t1->data[(j * t1->w) + i],
@ -713,6 +1001,65 @@ static void t1_dec_clnpass(
}
}
}
} else {
int *data1 = t1->data;
flag_t *flags1 = &t1->flags[1];
for (k = 0; k < (t1->h & ~3); k += 4) {
for (i = 0; i < t1->w; ++i) {
int *data2 = data1 + i;
flag_t *flags2 = flags1 + i;
agg = !(MACRO_t1_flags(1 + k,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 1,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 2,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
|| MACRO_t1_flags(1 + k + 3,1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH));
if (agg) {
mqc_setcurctx(mqc, T1_CTXNO_AGG);
if (!mqc_decode(mqc)) {
continue;
}
mqc_setcurctx(mqc, T1_CTXNO_UNI);
runlen = mqc_decode(mqc);
runlen = (runlen << 1) | mqc_decode(mqc);
flags2 += runlen * t1->flags_stride;
data2 += runlen * t1->w;
for (j = k + runlen; j < k + 4 && j < t1->h; ++j) {
flags2 += t1->flags_stride;
if (agg && (j == k + runlen)) {
t1_dec_clnpass_step_partial(t1, flags2, data2, orient, oneplushalf);
} else {
t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf);
}
data2 += t1->w;
}
} else {
flags2 += t1->flags_stride;
t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
flags2 += t1->flags_stride;
t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
}
}
data1 += t1->w << 2;
flags1 += t1->flags_stride << 2;
}
for (i = 0; i < t1->w; ++i) {
int *data2 = data1 + i;
flag_t *flags2 = flags1 + i;
for (j = k; j < t1->h; ++j) {
flags2 += t1->flags_stride;
t1_dec_clnpass_step(t1, flags2, data2, orient, oneplushalf);
data2 += t1->w;
}
}
}
if (segsym) {
int v = 0;
mqc_setcurctx(mqc, T1_CTXNO_UNI);
@ -975,10 +1322,26 @@ static void t1_decode_cblk(
for (passno = 0; passno < seg->numpasses; ++passno) {
switch (passtype) {
case 0:
t1_dec_sigpass(t1, bpno+1, orient, type, cblksty);
if (type == T1_TYPE_RAW) {
t1_dec_sigpass_raw(t1, bpno+1, orient, cblksty);
} else {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
t1_dec_sigpass_mqc_vsc(t1, bpno+1, orient);
} else {
t1_dec_sigpass_mqc(t1, bpno+1, orient);
}
}
break;
case 1:
t1_dec_refpass(t1, bpno+1, type, cblksty);
if (type == T1_TYPE_RAW) {
t1_dec_refpass_raw(t1, bpno+1, cblksty);
} else {
if (cblksty & J2K_CCP_CBLKSTY_VSC) {
t1_dec_refpass_mqc_vsc(t1, bpno+1);
} else {
t1_dec_refpass_mqc(t1, bpno+1);
}
}
break;
case 2:
t1_dec_clnpass(t1, bpno+1, orient, cblksty);
@ -1145,7 +1508,6 @@ void t1_decode_cblks(
for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
int* restrict datap;
void* restrict tiledp;
int cblk_w, cblk_h;
int x, y;
int i, j;
@ -1186,8 +1548,8 @@ void t1_decode_cblks(
}
}
tiledp=(void*)&tilec->data[(y * tile_w) + x];
if (tccp->qmfbid == 1) {
int* restrict tiledp = &tilec->data[(y * tile_w) + x];
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
int tmp = datap[(j * cblk_w) + i];
@ -1195,11 +1557,16 @@ void t1_decode_cblks(
}
}
} else { /* if (tccp->qmfbid == 0) */
float* restrict tiledp = (float*) &tilec->data[(y * tile_w) + x];
for (j = 0; j < cblk_h; ++j) {
float* restrict tiledp2 = tiledp;
for (i = 0; i < cblk_w; ++i) {
float tmp = datap[(j * cblk_w) + i] * band->stepsize;
((float*)tiledp)[(j * tile_w) + i] = tmp;
float tmp = *datap * band->stepsize;
*tiledp2 = tmp;
datap++;
tiledp2++;
}
tiledp += tile_w;
}
}
opj_free(cblk->data);