diff --git a/ChangeLog b/ChangeLog index 74775c20..09fa017f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,14 +5,21 @@ What's New for OpenJPEG ! : changed + : added +November 13, 2007 +! [FOD] Patch by Dzonatas and Callum Lerwick. + Fp/vectorization patch which basically converts most of the irreversible decode codepath to floating point, + eliminating a few rounds of int/fp conversion, resulting in a vast performance improvement, + and an increase in accuracy. + November 8, 2007 ! [FOD] In t1.c, small change to avoid calling twice t1_getwmsedec() - Patches from Callum Lewick: - - Basic gcc optimization flags in cmake and makefile match. - - Fixed some spelling errors in dwt.c. + Patches from Callum Lewick: + - Basic gcc optimization flags in cmake and makefile match. + - Fixed some spelling errors in dwt.c. November 5, 2007 -*+ [GB] Fixed a bug which prevented JPWL from working on multi-tiled images; added some more fields in the interface info structures (keep a list of markers, save start packet number for each tile) +*+ [GB] Fixed a bug which prevented JPWL from working on multi-tiled images; added some more fields in the interface info structures +(keep a list of markers, save start packet number for each tile) October 23, 2007 * [GB] Improved success for the linux build; OPJViewer shows all the COM contents @@ -39,7 +46,8 @@ October 12, 2007 October 10, 2007 * [FOD] Patch from Callum Lewick. Clean up of j2klib.h for the aligned malloc stuff. - It makes it work right with mingw, as _mm_malloc() isn't a macro, attempts to pave the way to using cmake to check for this stuff and combines a patch from Dana Fagerstrom at Sun that makes it use memalign() on Solaris + It makes it work right with mingw, as _mm_malloc() isn't a macro, attempts to pave the way to using cmake + to check for this stuff and combines a patch from Dana Fagerstrom at Sun that makes it use memalign() on Solaris convert.c: Changed some error comments for TIFF images September 27, 2007 @@ -91,10 +99,12 @@ September 6, 2007 * [Mathieu Malaterre] Fix unitialized read in img_fol (we may need a smarter initialize than memset) September 4, 2007 -+ [GB] Added some fields in the codestream_info structure: they are used to record the position of single tile parts. Changed also the write_index function in the codec, to reflect the presence of this new information. ++ [GB] Added some fields in the codestream_info structure: they are used to record the position of single tile parts. + Changed also the write_index function in the codec, to reflect the presence of this new information. September 3, 2007 -+ [GB] Added the knowledge of JPSEC SEC and INSEC markers (you have to compile the JPWL project). Management of these markers is limited to skipping them without crashing: no real security function at this stage. Deprecated USE_JPSEC will be removed next ++ [GB] Added the knowledge of JPSEC SEC and INSEC markers (you have to compile the JPWL project). Management of these markers is limited to skipping them without crashing: + no real security function at this stage. Deprecated USE_JPSEC will be removed next August 31, 2007 * [GB] Fixed save capabilities in OPJViewer due to recent code upgrade @@ -146,12 +156,15 @@ July 17, 2007 + [FOD] Added support for RAW images. This module has been developped by the University of Perugia team. Thanks to them ! [image_to_j2k.c j2k_to_image.c convert.c convert.h] July 13, 2007 -! [FOD] Modified the memory allocation for codestreams containing multiple tiles. The memory is now allocated for each tile indenpendently, leading to an important decrease of the virtual memory needed. [j2k.c tcd.h tcd.c] +! [FOD] Modified the memory allocation for codestreams containing multiple tiles. The memory is now allocated for each tile indenpendently, + leading to an important decrease of the virtual memory needed. [j2k.c tcd.h tcd.c] ! [FOD] Modified old comments about the ability to decode mega-images and comments about the disk size necessary to do this. [image_to_j2k.c and frames_to_mj2.c] * [FOD] Added 2000 bytes for the memory allocation in cio.c for the minimum size of headers (useful in case of very small images) [cio.c] July 12, 2007 -* [GB] fixed a bug in JPWL module, which prevented to exploit the full error correction capability of RS codes (e.g. it gave up at 5 errors, even if 6 were correctable); defined a JPWL_MAXIMUM_EPB_ROOM for better customization of the maximum dimension of EPBs (the dimension is pre-calculated on an hypothesis, if it goes beyond 65535 there will be problems, thus we give a little less than the max, let's say 65450) +* [GB] fixed a bug in JPWL module, which prevented to exploit the full error correction capability of RS codes (e.g. it gave up at 5 errors, + even if 6 were correctable); defined a JPWL_MAXIMUM_EPB_ROOM for better customization of the maximum dimension of EPBs (the dimension + is pre-calculated on an hypothesis, if it goes beyond 65535 there will be problems, thus we give a little less than the max, let's say 65450) July 8, 2007 * [ANTONIN] fixed the size of the memory allocation in cio.c (confusion between bits and bytes) @@ -186,13 +199,16 @@ May 31, 2007 * [FOD] Fixed the parameters used for cinema compression (9-7 transform used instead of 5-3). Modified "image_to_j2k.c" May 24, 2007 -* [FOD] Bug fixed by Sylvain Munaut. Change in the reading of the POC marker. Since COD/COC can be anywhere in the header, the decoder cannot always know while decoding the POC marker the value of numlayers and numresolution. +* [FOD] Bug fixed by Sylvain Munaut. Change in the reading of the POC marker. Since COD/COC can be anywhere in the header, the decoder cannot always know while decoding the POC marker + the value of numlayers and numresolution. May 23, 2007 ! [FOD] Patch suggested by Callum Lerwick : "This makes the t1 data arrays dynamic, which greatly reduces cache thrashing. Also, some minor cleanup to prevent unnecessary casts" May 22, 2007 -! [FOD] Patch suggested by Callum Lerwick : "Some formatting cleanups, so that the long function definitions and calls fit on screen. Use of prefix increment which is theoretically faster, in practice any sane compiler can optimize a postfix increment but its best not to count on such things. Consolidation of some redundant calculations in the inner loops, which becomes very useful in the future autovectorize patch." +! [FOD] Patch suggested by Callum Lerwick : "Some formatting cleanups, + so that the long function definitions and calls fit on screen. Use of prefix increment which is theoretically faster, in practice any sane compiler can optimize a postfix + increment but its best not to count on such things. Consolidation of some redundant calculations in the inner loops, which becomes very useful in the future autovectorize patch." ! [FOD] Patch suggested by Callum Lerwick : "This changes the flag code in t1 to use a flag_t type, which can then be changed to reduce RAM usage. It is now typedef to a short." ! [FOD] Patch suggested by Callum Lerwick : "This patch makes the t1 LUTs static. I actually intend this as a prelude to possibly eliminating some or all of the LUTs entirely." @@ -241,14 +257,17 @@ March 29, 2007 * [Parvatha] Equation to check multiple tile precincts. Modification pi.c ! [Parvatha] array size generation of pi->include in pi_initialise_encode().Modification in pi.c * [Parvatha] modification in pi_create_encode for tile part generation.Modification in pi.c -+ [Parvatha] In tcd_rateallocate a variable stable_threshold which holds the valid threshold value. This is used to avoid error in case of a wrong threshold value in the last iteration. Modification in tcd.c. ++ [Parvatha] In tcd_rateallocate a variable stable_threshold which holds the valid threshold value. + This is used to avoid error in case of a wrong threshold value in the last iteration. Modification in tcd.c. March 28, 2007 * [FOD] Fixed an historical bug in t1.c that leaded to the inclusion of useless 0xFF in the codestream. Thanks to Sylvain, Pascal and Parvatha ! March 27, 2007 -+ [GB] Improved parsing in OPJViewer, as well some minor aesthetic modifications; support for image rendering with bit depths lower than 8 bits; can display an arbitrary frame of an MJ2 file (only in B/W, though); can reload a file; better resizing capabilities -* [GB] Following to Hervé's suggestions, all the exit() calls, added by JPWL strict checking in t2.c and j2k.c, have been substituted with (object free'ing + opj_evt_message(EVT_ERROR) + return) ++ [GB] Improved parsing in OPJViewer, as well some minor aesthetic modifications; support for image rendering with bit depths lower than 8 bits; + can display an arbitrary frame of an MJ2 file (only in B/W, though); can reload a file; better resizing capabilities +* [GB] Following to Hervé's suggestions, all the exit() calls, added by JPWL strict checking in t2.c and j2k.c, + have been substituted with (object free'ing + opj_evt_message(EVT_ERROR) + return) + [GB] Added linking to TIFF library in the JPWL VC6 workspaces March 23, 2007 @@ -294,7 +313,8 @@ VERSION 1.1.1 RELEASED ---------------------- February 23, 2007 -* [GB] Fixed a copy-and-paste type assignment error (bool instead of int) in the JPWL section of decoder parameters structure in openjpeg.h; minor type-casting in jpwl_lib.c. As a result, now OPJViewer should run correctly when built against the most current SVN trunk of LibOpenJPEG.lib +* [GB] Fixed a copy-and-paste type assignment error (bool instead of int) in the JPWL section of decoder parameters structure in openjpeg.h; minor type-casting in jpwl_lib.c. + As a result, now OPJViewer should run correctly when built against the most current SVN trunk of LibOpenJPEG.lib + [GB] Linux makefile for the JPWL module; newlines at end of JPWL files February 22, 2007 diff --git a/libopenjpeg/dwt.c b/libopenjpeg/dwt.c index 161512aa..f4f069df 100644 --- a/libopenjpeg/dwt.c +++ b/libopenjpeg/dwt.c @@ -5,6 +5,8 @@ * Copyright (c) 2002-2003, Yannick Verschueren * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe * Copyright (c) 2005, Herve Drolon, FreeImage Team + * Copyright (c) 2007, Jonathan Ballard + * Copyright (c) 2007, Callum Lerwick * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +31,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#ifdef __SSE__ +#include +#endif #include "opj_includes.h" @@ -41,12 +46,32 @@ /** @name Local data structures */ /*@{*/ -typedef struct dwt_local{ - int * mem ; +typedef struct dwt_local { + int* mem; + int dn; + int sn; + int cas; +} dwt_t; + +typedef union { + float f[4]; +} v4; + +typedef struct v4dwt_local { + v4* wavelet ; int dn ; int sn ; int cas ; - } dwt_t ; +} v4dwt_t ; + +static const float alpha = 1.586134342f; // 12994 +static const float beta = 0.052980118f; // 434 +static const float gamma = -0.882911075f; // -7233 +static const float delta = -0.443506852f; // -3633 + +static const float K = 1.230174105f; // 10078 +/* FIXME: What is this constant? */ +static const float c13318 = 1.625732422f; /*@}*/ @@ -87,17 +112,13 @@ Forward 9-7 wavelet transform in 1-D */ static void dwt_encode_1_real(int *a, int dn, int sn, int cas); /** -Inverse 9-7 wavelet transform in 1-D -*/ -static void dwt_decode_1_real(dwt_t *v); -/** -FIXME : comment ??? +Explicit calculation of the Quantization Stepsizes */ static void dwt_encode_stepsize(int stepsize, int numbps, opj_stepsize_t *bandno_stepsize); /** Inverse wavelet transform in 2-D. */ -static void dwt_decode_tile(opj_tcd_tilecomp_t * tilec, int stop , DWT1DFN fn); +static void dwt_decode_tile(opj_tcd_tilecomp_t* tilec, int i, DWT1DFN fn); /*@}*/ @@ -285,102 +306,6 @@ static void dwt_encode_1_real(int *a, int dn, int sn, int cas) { } } -static void dwt_decode_sm(dwt_t* v, int k, int n, int x) { - int m = k > n ? n : k; - int l = v->mem[1]; //D(0); - int j; - int i; - for (i = 0; i < m; i++) { - j = l; - WS(i) -= fix_mul( ( l = WD(i) ) + j , x); - } - if( i < k ) { - l = fix_mul( l + l , x ); - for (; i < k; i++) - WS(i) -= l; - } -} - -static void dwt_decode_sp(dwt_t* v, int k, int n, int x) { - int m = k > n ? n : k; - int l = v->mem[1]; //D(0); - int j; - int i; - for (i = 0; i < m; i++) { - j = l; - WS(i) += fix_mul( ( l = WD(i) ) + j , x); - } - if( i < k ) { - l = fix_mul( l + l , x ); - for (; i < k; i++) - WS(i) += l; - } -} - -static void dwt_decode_dm(dwt_t* v, int k, int n, int x) { - int m = k >= n ? n-1 : k; - int l = v->mem[0]; //S(0); - int i; - int j; - for (i = 0; i < m; i++) { - j = l; - WD(i) -= fix_mul( ( l = WS(i+1) ) + j , x); - } - if( i < k ) { - l = fix_mul( l + l , x ); - for (; i < k; i++) - WD(i) -= l; - } -} - -static void dwt_decode_dp(dwt_t* v, int k, int n, int x) { - int m = k >= n ? n-1 : k; - int l = v->mem[0]; //S(0); - int i; - int j; - for (i = 0; i < m; i++) { - j = l; - WD(i) += fix_mul( ( l = WS(i+1) ) + j , x); - } - - if( i < k ) { - l = fix_mul( l + l , x ); - for (; i < k; i++) - WD(i) += l; - } -} - - -/* */ -/* Inverse 9-7 wavelet transform in 1-D. */ -/* */ -static void dwt_decode_1_real(dwt_t* v) { - int i; - if (!v->cas) { - if ((v->dn > 0) || (v->sn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < v->sn; i++) - WS(i) = fix_mul(WS(i), 10078); /* 10076 */ - for (i = 0; i < v->dn; i++) - WD(i) = fix_mul(WD(i), 13318); /* 13320 */ - dwt_decode_sm(v, v->sn, v->dn, 3633); - dwt_decode_dm(v, v->dn, v->sn, 7233); - dwt_decode_sp(v, v->sn, v->dn, 434); - dwt_decode_dp(v, v->dn, v->sn, 12994); - } - } else { - if ((v->sn > 0) || (v->dn > 1)) { /* NEW : CASE ONE ELEMENT */ - for (i = 0; i < v->sn; i++) - WD(i) = fix_mul(WD(i), 10078); /* 10076 */ - for (i = 0; i < v->dn; i++) - WS(i) = fix_mul(WS(i), 13318); /* 13320 */ - dwt_decode_dm(v, v->sn, v->dn, 3633); - dwt_decode_sm(v, v->dn, v->sn, 7233); - dwt_decode_dp(v, v->sn, v->dn, 434); - dwt_decode_sp(v, v->dn, v->sn, 12994); - } - } -} - static void dwt_encode_stepsize(int stepsize, int numbps, opj_stepsize_t *bandno_stepsize) { int p, n; p = int_floorlog2(stepsize) - 13; @@ -454,8 +379,8 @@ void dwt_encode(opj_tcd_tilecomp_t * tilec) { /* */ /* Inverse 5-3 wavelet transform in 2-D. */ /* */ -void dwt_decode(opj_tcd_tilecomp_t * tilec, int stop) { - dwt_decode_tile(tilec, stop, &dwt_decode_1); +void dwt_decode(opj_tcd_tilecomp_t* tilec, int numres) { + dwt_decode_tile(tilec, numres, &dwt_decode_1); } @@ -534,14 +459,6 @@ void dwt_encode_real(opj_tcd_tilecomp_t * tilec) { } -/* */ -/* Inverse 9-7 wavelet transform in 2-D. */ -/* */ -void dwt_decode_real(opj_tcd_tilecomp_t * tilec, int stop) { - dwt_decode_tile(tilec, stop, dwt_decode_1_real); -} - - /* */ /* Get gain of 9-7 wavelet transform. */ /* */ @@ -582,7 +499,7 @@ void dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, int prec) { /* */ /* Determine maximum computed resolution level for inverse wavelet transform */ /* */ -static int dwt_decode_max_resolution(opj_tcd_resolution_t* r, int i) { +static int dwt_decode_max_resolution(opj_tcd_resolution_t* restrict r, int i) { int mr = 1; int w; while( --i ) { @@ -599,62 +516,310 @@ static int dwt_decode_max_resolution(opj_tcd_resolution_t* r, int i) { /* */ /* Inverse wavelet transform in 2-D. */ /* */ -static void dwt_decode_tile(opj_tcd_tilecomp_t * tilec, int stop, DWT1DFN dwt_1D) { - opj_tcd_resolution_t* tr; - int i, j, k; - int *a = NULL; - int *aj = NULL; - int w; //, l; - int rw; /* width of the resolution level computed */ - int rh; /* height of the resolution level computed */ +static void dwt_decode_tile(opj_tcd_tilecomp_t* tilec, int numres, DWT1DFN dwt_1D) { dwt_t h; dwt_t v; - - if( 1 > ( i = tilec->numresolutions - stop ) ) - return ; - tr = tilec->resolutions; + opj_tcd_resolution_t* tr = tilec->resolutions; - w = tilec->x1-tilec->x0; - a = tilec->data; + int rw = tr->x1 - tr->x0; /* width of the resolution level computed */ + int rh = tr->y1 - tr->y0; /* height of the resolution level computed */ - h.mem = (int*) opj_aligned_malloc(dwt_decode_max_resolution(tr, i) * sizeof(int)); + int w = tilec->x1 - tilec->x0; + + h.mem = opj_aligned_malloc(dwt_decode_max_resolution(tr, numres) * sizeof(int)); v.mem = h.mem; - rw = tr->x1 - tr->x0; - rh = tr->y1 - tr->y0; + while( --numres) { + int * restrict tiledp = tilec->data; + int j; - while( --i ) { - tr++; + ++tr; h.sn = rw; v.sn = rh; - h.dn = ( rw = tr->x1 - tr->x0 ) - h.sn; - v.dn = ( rh = tr->y1 - tr->y0 ) - v.sn; - - h.cas = tr->x0 % 2; - v.cas = tr->y0 % 2; - aj = a; - j = rh; - while( j-- ) { - dwt_interleave_h(&h, aj); + rw = tr->x1 - tr->x0; + rh = tr->y1 - tr->y0; + + h.dn = rw - h.sn; + h.cas = tr->x0 % 2; + + for(j = 0; j < rh; ++j) { + dwt_interleave_h(&h, &tiledp[j*w]); (dwt_1D)(&h); - k = rw; - while( k-- ) - aj[k] = h.mem[k]; - aj += w; + memcpy(&tiledp[j*w], h.mem, rw * sizeof(int)); } - aj = a; - j = rw; - while( j-- ) { - dwt_interleave_v(&v, aj, w); + v.dn = rh - v.sn; + v.cas = tr->y0 % 2; + + for(j = 0; j < rw; ++j){ + int k; + dwt_interleave_v(&v, &tiledp[j], w); (dwt_1D)(&v); - k = rh; - while( k-- ) - aj[k * w] = v.mem[k]; - aj++; + for(k = 0; k < rh; ++k) { + tiledp[k * w + j] = v.mem[k]; + } } } opj_aligned_free(h.mem); } + +static void v4dwt_interleave_h(v4dwt_t* restrict w, float* restrict a, int x, int size){ + float* restrict bi = (float*) (w->wavelet + w->cas); + int count = w->sn; + int i, k; + for(k = 0; k < 2; ++k){ + for(i = 0; i < count; ++i){ + int j = i; + bi[i*8 ] = a[j]; + j += x; + if(j > size) continue; + bi[i*8 + 1] = a[j]; + j += x; + if(j > size) continue; + bi[i*8 + 2] = a[j]; + j += x; + if(j > size) continue; + bi[i*8 + 3] = a[j]; + } + bi = (float*) (w->wavelet + 1 - w->cas); + a += w->sn; + size -= w->sn; + count = w->dn; + } +} + +static void v4dwt_interleave_v(v4dwt_t* restrict v , float* restrict a , int x){ + v4* restrict bi = v->wavelet + v->cas; + int i; + for(i = 0; i < v->sn; ++i){ + memcpy(&bi[i*2], &a[i*x], 4 * sizeof(float)); + } + a += v->sn * x; + bi = v->wavelet + 1 - v->cas; + for(i = 0; i < v->dn; ++i){ + memcpy(&bi[i*2], &a[i*x], 4 * sizeof(float)); + } +} + +#ifdef __SSE__ + +static void v4dwt_decode_step1_sse(v4* w, int count, const __m128 c){ + __m128* restrict vw = (__m128*) w; + int i; + for(i = 0; i < count; ++i){ + __m128 tmp = vw[i*2]; + vw[i*2] = tmp * c; + } +} + +static void v4dwt_decode_step2_sse(v4* l, v4* w, int k, int m, __m128 c){ + __m128* restrict vl = (__m128*) l; + __m128* restrict vw = (__m128*) w; + int i; + for(i = 0; i < m; ++i){ + __m128 tmp1 = vl[ 0]; + __m128 tmp2 = vw[-1]; + __m128 tmp3 = vw[ 0]; + vw[-1] = tmp2 + ((tmp1 + tmp3) * c); + vl = vw; + vw += 2; + } + if(m >= k){ + return; + } + c += c; + c *= vl[0]; + for(; m < k; ++m){ + __m128 tmp = vw[-1]; + vw[-1] = tmp + c; + vw += 2; + } +} + +#else + +static void v4dwt_decode_step1(v4* w, int count, const float c){ + float* restrict fw = (float*) w; + int i; + for(i = 0; i < count; ++i){ + float tmp1 = fw[i*8 ]; + float tmp2 = fw[i*8 + 1]; + float tmp3 = fw[i*8 + 2]; + float tmp4 = fw[i*8 + 3]; + fw[i*8 ] = tmp1 * c; + fw[i*8 + 1] = tmp2 * c; + fw[i*8 + 2] = tmp3 * c; + fw[i*8 + 3] = tmp4 * c; + } +} + +static void v4dwt_decode_step2(v4* l, v4* w, int k, int m, float c){ + float* restrict fl = (float*) l; + float* restrict fw = (float*) w; + int i; + for(i = 0; i < m; ++i){ + float tmp1_1 = fl[0]; + float tmp1_2 = fl[1]; + float tmp1_3 = fl[2]; + float tmp1_4 = fl[3]; + float tmp2_1 = fw[-4]; + float tmp2_2 = fw[-3]; + float tmp2_3 = fw[-2]; + float tmp2_4 = fw[-1]; + float tmp3_1 = fw[0]; + float tmp3_2 = fw[1]; + float tmp3_3 = fw[2]; + float tmp3_4 = fw[3]; + fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); + fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); + fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); + fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); + fl = fw; + fw += 8; + } + if(m < k){ + float c1; + float c2; + float c3; + float c4; + c += c; + c1 = fl[0] * c; + c2 = fl[1] * c; + c3 = fl[2] * c; + c4 = fl[3] * c; + for(; m < k; ++m){ + float tmp1 = fw[-4]; + float tmp2 = fw[-3]; + float tmp3 = fw[-2]; + float tmp4 = fw[-1]; + fw[-4] = tmp1 + c1; + fw[-3] = tmp2 + c2; + fw[-2] = tmp3 + c3; + fw[-1] = tmp4 + c4; + fw += 8; + } + } +} + +#endif + +/* */ +/* Inverse 9-7 wavelet transform in 1-D. */ +/* */ +static void v4dwt_decode(v4dwt_t* restrict dwt){ + int a, b; + if(dwt->cas == 0) { + if(!((dwt->dn > 0) || (dwt->sn > 1))){ + return; + } + a = 0; + b = 1; + }else{ + if(!((dwt->sn > 0) || (dwt->dn > 1))) { + return; + } + a = 1; + b = 0; + } +#ifdef __SSE__ + v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(K)); + v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(c13318)); + v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(delta)); + v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(gamma)); + v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(beta)); + v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(alpha)); +#else + v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, K); + v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, c13318); + v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), delta); + v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), gamma); + v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, int_min(dwt->sn, dwt->dn-a), beta); + v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, int_min(dwt->dn, dwt->sn-b), alpha); +#endif +} + +/* */ +/* Inverse 9-7 wavelet transform in 2-D. */ +/* */ +void dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, int numres){ + v4dwt_t h; + v4dwt_t v; + + opj_tcd_resolution_t* res = tilec->resolutions; + + int rw = res->x1 - res->x0; /* width of the resolution level computed */ + int rh = res->y1 - res->y0; /* height of the resolution level computed */ + + int w = tilec->x1 - tilec->x0; + + h.wavelet = (v4*) opj_aligned_malloc((dwt_decode_max_resolution(res, numres)+5) * sizeof(v4)); + v.wavelet = h.wavelet; + + while( --numres) { + float * restrict aj = (float*) tilec->data; + int bufsize = (tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0); + int j; + + h.sn = rw; + v.sn = rh; + + ++res; + + rw = res->x1 - res->x0; /* width of the resolution level computed */ + rh = res->y1 - res->y0; /* height of the resolution level computed */ + + h.dn = rw - h.sn; + h.cas = res->x0 % 2; + + for(j = rh; j > 0; j -= 4){ + v4dwt_interleave_h(&h, aj, w, bufsize); + v4dwt_decode(&h); + if(j >= 4){ + int k; + for(k = rw; --k >= 0;){ + aj[k ] = h.wavelet[k].f[0]; + aj[k+w ] = h.wavelet[k].f[1]; + aj[k+w*2] = h.wavelet[k].f[2]; + aj[k+w*3] = h.wavelet[k].f[3]; + } + }else{ + int k; + for(k = rw; --k >= 0;){ + switch(j) { + case 3: aj[k+w*2] = h.wavelet[k].f[2]; + case 2: aj[k+w ] = h.wavelet[k].f[1]; + case 1: aj[k ] = h.wavelet[k].f[0]; + } + } + } + aj += w*4; + bufsize -= w*4; + } + + v.dn = rh - v.sn; + v.cas = res->y0 % 2; + + aj = (float*) tilec->data; + for(j = rw; j > 0; j -= 4){ + v4dwt_interleave_v(&v, aj, w); + v4dwt_decode(&v); + if(j >= 4){ + int k; + for(k = 0; k < rh; ++k){ + memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(float)); + } + }else{ + int k; + for(k = 0; k < rh; ++k){ + memcpy(&aj[k*w], &v.wavelet[k], j * sizeof(float)); + } + } + aj += 4; + } + } + + opj_aligned_free(h.wavelet); +} + diff --git a/libopenjpeg/dwt.h b/libopenjpeg/dwt.h index 5c95c760..adf73e54 100644 --- a/libopenjpeg/dwt.h +++ b/libopenjpeg/dwt.h @@ -57,9 +57,9 @@ void dwt_encode(opj_tcd_tilecomp_t * tilec); Inverse 5-3 wavelet tranform in 2-D. Apply a reversible inverse DWT transform to a component of an image. @param tilec Tile component information (current tile) -@param stop FIXME Number of decoded resolution levels ? +@param numres Number of resolution levels to decode */ -void dwt_decode(opj_tcd_tilecomp_t * tilec, int stop); +void dwt_decode(opj_tcd_tilecomp_t* tilec, int numres); /** Get the gain of a subband for the reversible 5-3 DWT. @param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) @@ -83,9 +83,9 @@ void dwt_encode_real(opj_tcd_tilecomp_t * tilec); Inverse 9-7 wavelet transform in 2-D. Apply an irreversible inverse DWT transform to a component of an image. @param tilec Tile component information (current tile) -@param stop FIXME Number of decoded resolution levels ? +@param numres Number of resolution levels to decode */ -void dwt_decode_real(opj_tcd_tilecomp_t * tilec, int stop); +void dwt_decode_real(opj_tcd_tilecomp_t* tilec, int numres); /** Get the gain of a subband for the irreversible 9-7 DWT. @param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) @@ -100,9 +100,9 @@ Get the norm of a wavelet function of a subband at a specified level for the irr */ double dwt_getnorm_real(int level, int orient); /** -FIXME : comment ??? -@param tccp -@param prec +Explicit calculation of the Quantization Stepsizes +@param tccp Tile-component coding parameters +@param prec Precint analyzed */ void dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, int prec); /* ----------------------------------------------------------------------- */ diff --git a/libopenjpeg/mct.c b/libopenjpeg/mct.c index fad93552..ca21744f 100644 --- a/libopenjpeg/mct.c +++ b/libopenjpeg/mct.c @@ -44,16 +44,20 @@ static const double mct_norms_real[3] = { 1.732, 1.805, 1.573 }; /* */ /* Foward reversible MCT. */ /* */ -void mct_encode(int *c0, int *c1, int *c2, int n) { +void mct_encode( + int* restrict c0, + int* restrict c1, + int* restrict c2, + int n) +{ int i; - for (i = 0; i < n; i++) { - int r, g, b, y, u, v; - r = c0[i]; - g = c1[i]; - b = c2[i]; - y = (r + (g << 1) + b) >> 2; - u = b - g; - v = r - g; + for(i = 0; i < n; ++i) { + int r = c0[i]; + int g = c1[i]; + int b = c2[i]; + int y = (r + (g * 2) + b) >> 2; + int u = b - g; + int v = r - g; c0[i] = y; c1[i] = u; c2[i] = v; @@ -63,16 +67,20 @@ void mct_encode(int *c0, int *c1, int *c2, int n) { /* */ /* Inverse reversible MCT. */ /* */ -void mct_decode(int *c0, int *c1, int *c2, int n) { +void mct_decode( + int* restrict c0, + int* restrict c1, + int* restrict c2, + int n) +{ int i; - for (i = 0; i < n; i++) { - int y, u, v, r, g, b; - y = c0[i]; - u = c1[i]; - v = c2[i]; - g = y - ((u + v) >> 2); - r = v + g; - b = u + g; + for (i = 0; i < n; ++i) { + int y = c0[i]; + int u = c1[i]; + int v = c2[i]; + int g = y - ((u + v) >> 2); + int r = v + g; + int b = u + g; c0[i] = r; c1[i] = g; c2[i] = b; @@ -89,16 +97,20 @@ double mct_getnorm(int compno) { /* */ /* Foward irreversible MCT. */ /* */ -void mct_encode_real(int *c0, int *c1, int *c2, int n) { +void mct_encode_real( + int* restrict c0, + int* restrict c1, + int* restrict c2, + int n) +{ int i; - for (i = 0; i < n; i++) { - int r, g, b, y, u, v; - r = c0[i]; - g = c1[i]; - b = c2[i]; - y = fix_mul(r, 2449) + fix_mul(g, 4809) + fix_mul(b, 934); - u = -fix_mul(r, 1382) - fix_mul(g, 2714) + fix_mul(b, 4096); - v = fix_mul(r, 4096) - fix_mul(g, 3430) - fix_mul(b, 666); + for(i = 0; i < n; ++i) { + int r = c0[i]; + int g = c1[i]; + int b = c2[i]; + int y = fix_mul(r, 2449) + fix_mul(g, 4809) + fix_mul(b, 934); + int u = -fix_mul(r, 1382) - fix_mul(g, 2714) + fix_mul(b, 4096); + int v = fix_mul(r, 4096) - fix_mul(g, 3430) - fix_mul(b, 666); c0[i] = y; c1[i] = u; c2[i] = v; @@ -108,16 +120,20 @@ void mct_encode_real(int *c0, int *c1, int *c2, int n) { /* */ /* Inverse irreversible MCT. */ /* */ -void mct_decode_real(int *c0, int *c1, int *c2, int n) { +void mct_decode_real( + float* restrict c0, + float* restrict c1, + float* restrict c2, + int n) +{ int i; - for (i = 0; i < n; i++) { - int y, u, v, r, g, b; - y = c0[i]; - u = c1[i]; - v = c2[i]; - r = y + fix_mul(v, 11485); - g = y - fix_mul(u, 2819) - fix_mul(v, 5850); - b = y + fix_mul(u, 14516); + for(i = 0; i < n; ++i) { + float y = c0[i]; + float u = c1[i]; + float v = c2[i]; + float r = y + (v * 1.402f); + float g = y - (u * 0.34413f) - (v * (0.71414f)); + float b = y + (u * 1.772f); c0[i] = r; c1[i] = g; c2[i] = b; diff --git a/libopenjpeg/mct.h b/libopenjpeg/mct.h index 7596187d..84e3f8ad 100644 --- a/libopenjpeg/mct.h +++ b/libopenjpeg/mct.h @@ -83,7 +83,7 @@ Apply an irreversible multi-component inverse transform to an image @param c2 Samples for blue chrominance component @param n Number of samples for each component */ -void mct_decode_real(int *c0, int *c1, int *c2, int n); +void mct_decode_real(float* c0, float* c1, float* c2, int n); /** Get norm of the basis function used for the irreversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) diff --git a/libopenjpeg/opj_includes.h b/libopenjpeg/opj_includes.h index 7599a71f..80d43df9 100644 --- a/libopenjpeg/opj_includes.h +++ b/libopenjpeg/opj_includes.h @@ -76,6 +76,30 @@ Most compilers implement their own version of this keyword ... #endif /* defined() */ #endif /* INLINE */ +/* Are restricted pointers available? (C99) */ +#if (__STDC_VERSION__ != 199901L) + /* Not a C99 compiler */ + #ifdef __GNUC__ + #define restrict __restrict__ + #else + #define restrict /* restrict */ + #endif +#endif + +/* MSVC does not have lrintf */ +#ifdef _MSC_VER +static INLINE long lrintf(float f){ + int i; + + _asm{ + fld f + fistp i + }; + + return i; +} +#endif + #include "j2k_lib.h" #include "opj_malloc.h" #include "event.h" diff --git a/libopenjpeg/t1.c b/libopenjpeg/t1.c index de32e55b..83b10efd 100644 --- a/libopenjpeg/t1.c +++ b/libopenjpeg/t1.c @@ -802,24 +802,22 @@ static void t1_encode_cblk( int numcomps, opj_tcd_tile_t * tile) { - int i, j; - int passno; - int bpno, passtype; - int max; - int nmsedec = 0; double cumwmsedec = 0.0; + + opj_mqc_t *mqc = t1->mqc; /* MQC component */ + + int passno, bpno, passtype; + int nmsedec = 0; + int i, max; char type = T1_TYPE_MQ; double tempwmsedec; - - opj_mqc_t *mqc = t1->mqc; /* MQC component */ - + max = 0; - for (j = 0; j < t1->h; ++j) { - for (i = 0; i < t1->w; ++i) { - max = int_max(max, int_abs(t1->data[(j * t1->w) + i])); - } + for (i = 0; i < t1->w * t1->h; ++i) { + int tmp = abs(t1->data[i]); + max = int_max(max, tmp); } - + cblk->numbps = max ? (int_floorlog2(max) + 1) - T1_NMSEDEC_FRACBITS : 0; bpno = cblk->numbps - 1; @@ -932,12 +930,12 @@ static void t1_decode_cblk( int roishift, int cblksty) { + opj_raw_t *raw = t1->raw; /* RAW component */ + opj_mqc_t *mqc = t1->mqc; /* MQC component */ + int bpno, passtype; int segno, passno; char type = T1_TYPE_MQ; /* BYPASS mode */ - - opj_raw_t *raw = t1->raw; /* RAW component */ - opj_mqc_t *mqc = t1->mqc; /* MQC component */ if(!allocate_buffers( t1, @@ -1034,23 +1032,29 @@ void t1_encode_cblks( tile->distotile = 0; /* fixed_quality */ for (compno = 0; compno < tile->numcomps; ++compno) { - opj_tcd_tilecomp_t *tilec = &tile->comps[compno]; + opj_tcd_tilecomp_t* tilec = &tile->comps[compno]; + opj_tccp_t* tccp = &tcp->tccps[compno]; + int tile_w = tilec->x1 - tilec->x0; for (resno = 0; resno < tilec->numresolutions; ++resno) { opj_tcd_resolution_t *res = &tilec->resolutions[resno]; for (bandno = 0; bandno < res->numbands; ++bandno) { - opj_tcd_band_t *band = &res->bands[bandno]; + opj_tcd_band_t* restrict band = &res->bands[bandno]; for (precno = 0; precno < res->pw * res->ph; ++precno) { opj_tcd_precinct_t *prc = &band->precincts[precno]; for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { - int x, y, w, i, j; opj_tcd_cblk_t *cblk = &prc->cblks[cblkno]; + int* restrict datap; + int* restrict tiledp; + int cblk_w; + int cblk_h; + int i, j; - x = cblk->x0 - band->x0; - y = cblk->y0 - band->y0; + int x = cblk->x0 - band->x0; + int y = cblk->y0 - band->y0; if (band->bandno & 1) { opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; x += pres->x1 - pres->x0; @@ -1068,20 +1072,25 @@ void t1_encode_cblks( return; } - w = tilec->x1 - tilec->x0; - if (tcp->tccps[compno].qmfbid == 1) { - for (j = 0; j < t1->h; ++j) { - for (i = 0; i < t1->w; ++i) { - t1->data[(j * t1->w) + i] = - tilec->data[(x + i) + (y + j) * w] << T1_NMSEDEC_FRACBITS; + datap=t1->data; + cblk_w = t1->w; + cblk_h = t1->h; + + tiledp=&tilec->data[(y * tile_w) + x]; + if (tccp->qmfbid == 1) { + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + int tmp = tiledp[(j * tile_w) + i]; + datap[(j * cblk_w) + i] = tmp << T1_NMSEDEC_FRACBITS; } } - } else { /* if (tcp->tccps[compno].qmfbid == 0) */ - for (j = 0; j < t1->h; ++j) { - for (i = 0; i < t1->w; ++i) { - t1->data[(j * t1->w) + i] = + } else { /* if (tccp->qmfbid == 0) */ + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + int tmp = tiledp[(j * tile_w) + i]; + datap[(j * cblk_w) + i] = fix_mul( - tilec->data[x + i + (y + j) * w], + tmp, 8192 * 8192 / ((int) floor(band->stepsize * 8192))) >> (11 - T1_NMSEDEC_FRACBITS); } } @@ -1093,9 +1102,9 @@ void t1_encode_cblks( band->bandno, compno, tilec->numresolutions - 1 - resno, - tcp->tccps[compno].qmfbid, + tccp->qmfbid, band->stepsize, - tcp->tccps[compno].cblksty, + tccp->cblksty, tile->numcomps, tile); @@ -1107,87 +1116,86 @@ void t1_encode_cblks( } void t1_decode_cblks( - opj_t1_t *t1, - opj_tcd_tile_t *tile, - opj_tcp_t *tcp) + opj_t1_t* t1, + opj_tcd_tilecomp_t* tilec, + opj_tccp_t* tccp) { - int compno, resno, bandno, precno, cblkno; + int resno, bandno, precno, cblkno; - for (compno = 0; compno < tile->numcomps; ++compno) { - opj_tcd_tilecomp_t *tilec = &tile->comps[compno]; + int tile_w = tilec->x1 - tilec->x0; - for (resno = 0; resno < tilec->numresolutions; ++resno) { - opj_tcd_resolution_t *res = &tilec->resolutions[resno]; + for (resno = 0; resno < tilec->numresolutions; ++resno) { + opj_tcd_resolution_t* res = &tilec->resolutions[resno]; - for (bandno = 0; bandno < res->numbands; ++bandno) { - opj_tcd_band_t *band = &res->bands[bandno]; + for (bandno = 0; bandno < res->numbands; ++bandno) { + opj_tcd_band_t* restrict band = &res->bands[bandno]; - for (precno = 0; precno < res->pw * res->ph; ++precno) { - opj_tcd_precinct_t *prc = &band->precincts[precno]; + for (precno = 0; precno < res->pw * res->ph; ++precno) { + opj_tcd_precinct_t* prc = &band->precincts[precno]; - for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { - int x, y, w, i, j, cblk_w, cblk_h; - opj_tcd_cblk_t *cblk = &prc->cblks[cblkno]; + for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { + opj_tcd_cblk_t* cblk = &prc->cblks[cblkno]; + int* restrict datap; + void* restrict tiledp; + int cblk_w, cblk_h; + int x, y; + int i, j; - t1_decode_cblk( - t1, - cblk, - band->bandno, - tcp->tccps[compno].roishift, - tcp->tccps[compno].cblksty); + t1_decode_cblk( + t1, + cblk, + band->bandno, + tccp->roishift, + tccp->cblksty); - x = cblk->x0 - band->x0; - y = cblk->y0 - band->y0; - if (band->bandno & 1) { - opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; - x += pres->x1 - pres->x0; - } - if (band->bandno & 2) { - opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; - y += pres->y1 - pres->y0; - } + x = cblk->x0 - band->x0; + y = cblk->y0 - band->y0; + if (band->bandno & 1) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + x += pres->x1 - pres->x0; + } + if (band->bandno & 2) { + opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1]; + y += pres->y1 - pres->y0; + } - cblk_w = cblk->x1 - cblk->x0; - cblk_h = cblk->y1 - cblk->y0; + datap=t1->data; + cblk_w = t1->w; + cblk_h = t1->h; - if (tcp->tccps[compno].roishift) { - int thresh = 1 << tcp->tccps[compno].roishift; - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - int val = t1->data[(j * t1->w) + i]; - int mag = int_abs(val); - if (mag >= thresh) { - mag >>= tcp->tccps[compno].roishift; - t1->data[(j * t1->w) + i] = val < 0 ? -mag : mag; - } + if (tccp->roishift) { + int thresh = 1 << tccp->roishift; + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + int val = datap[(j * cblk_w) + i]; + int mag = abs(val); + if (mag >= thresh) { + mag >>= tccp->roishift; + datap[(j * cblk_w) + i] = val < 0 ? -mag : mag; } } } - - w = tilec->x1 - tilec->x0; - if (tcp->tccps[compno].qmfbid == 1) { - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - tilec->data[x + i + (y + j) * w] = t1->data[(j * t1->w) + i]/2; - } - } - } else { /* if (tcp->tccps[compno].qmfbid == 0) */ - for (j = 0; j < cblk_h; ++j) { - for (i = 0; i < cblk_w; ++i) { - if (t1->data[(j * t1->w) + i] >> 1 == 0) { - tilec->data[x + i + (y + j) * w] = 0; - } else { - double tmp = (double)(t1->data[(j * t1->w) + i] * band->stepsize * 4096.0); - int tmp2 = ((int) (floor(fabs(tmp)))) + ((int) floor(fabs(tmp*2))%2); - tilec->data[x + i + (y + j) * w] = ((tmp<0)?-tmp2:tmp2); - } - } + } + + tiledp=(void*)&tilec->data[(y * tile_w) + x]; + if (tccp->qmfbid == 1) { + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + int tmp = datap[(j * cblk_w) + i]; + ((int*)tiledp)[(j * tile_w) + i] = tmp / 2; } } - } /* cblkno */ - } /* precno */ - } /* bandno */ - } /* resno */ - } /* compno */ + } else { /* if (tccp->qmfbid == 0) */ + for (j = 0; j < cblk_h; ++j) { + for (i = 0; i < cblk_w; ++i) { + float tmp = datap[(j * cblk_w) + i] * band->stepsize; + ((float*)tiledp)[(j * tile_w) + i] = tmp; + } + } + } + } /* cblkno */ + } /* precno */ + } /* bandno */ + } /* resno */ } diff --git a/libopenjpeg/t1.h b/libopenjpeg/t1.h index 1b6cdb7c..0b4294e1 100644 --- a/libopenjpeg/t1.h +++ b/libopenjpeg/t1.h @@ -138,7 +138,7 @@ Decode the code-blocks of a tile @param tile The tile to decode @param tcp Tile coding parameters */ -void t1_decode_cblks(opj_t1_t *t1, opj_tcd_tile_t *tile, opj_tcp_t *tcp); +void t1_decode_cblks(opj_t1_t* t1, opj_tcd_tilecomp_t* tilec, opj_tccp_t* tccp); /* ----------------------------------------------------------------------- */ /*@}*/ diff --git a/libopenjpeg/tcd.c b/libopenjpeg/tcd.c index 2a8a6223..533fd246 100644 --- a/libopenjpeg/tcd.c +++ b/libopenjpeg/tcd.c @@ -670,8 +670,9 @@ void tcd_malloc_decode_tile(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, tilec->y0 = int_ceildiv(tile->y0, image->comps[compno].dy); tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx); tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy); - - tilec->data = (int*) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); + + /* The +3 is headroom required by the vectorized DWT */ + tilec->data = (int*) opj_aligned_malloc((((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0))+3) * sizeof(int)); tilec->numresolutions = tccp->numresolutions; tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(tilec->numresolutions * sizeof(opj_tcd_resolution_t)); @@ -756,7 +757,7 @@ void tcd_malloc_decode_tile(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, ss = &tccp->stepsizes[resno == 0 ? 0 : 3 * (resno - 1) + bandno + 1]; gain = tccp->qmfbid == 0 ? dwt_getgain_real(band->bandno) : dwt_getgain(band->bandno); numbps = image->comps[compno].prec + gain; - band->stepsize = (float)((1.0 + ss->mant / 2048.0) * pow(2.0, numbps - ss->expn)); + band->stepsize = (float)(((1.0 + ss->mant / 2048.0) * pow(2.0, numbps - ss->expn)) * 0.5); band->numbps = ss->expn + tccp->numgbits - 1; /* WHY -1 ? */ band->precincts = (opj_tcd_precinct_t *) opj_malloc(res->pw * res->ph * sizeof(opj_tcd_precinct_t)); @@ -1350,7 +1351,9 @@ bool tcd_decode_tile(opj_tcd_t *tcd, unsigned char *src, int len, int tileno, op t1_time = opj_clock(); /* time needed to decode a tile */ t1 = t1_create(tcd->cinfo); - t1_decode_cblks(t1, tile, tcd->tcp); + for (compno = 0; compno < tile->numcomps; ++compno) { + t1_decode_cblks(t1, &tile->comps[compno], &tcd->tcp->tccps[compno]); + } t1_destroy(t1); t1_time = opj_clock() - t1_time; opj_event_msg(tcd->cinfo, EVT_INFO, "- tiers-1 took %f s\n", t1_time); @@ -1360,6 +1363,8 @@ bool tcd_decode_tile(opj_tcd_t *tcd, unsigned char *src, int len, int tileno, op dwt_time = opj_clock(); /* time needed to decode a tile */ for (compno = 0; compno < tile->numcomps; compno++) { opj_tcd_tilecomp_t *tilec = &tile->comps[compno]; + int numres2decode; + if (tcd->cp->reduce != 0) { tcd->image->comps[compno].resno_decoded = tile->comps[compno].numresolutions - tcd->cp->reduce - 1; @@ -1369,66 +1374,75 @@ bool tcd_decode_tile(opj_tcd_t *tcd, unsigned char *src, int len, int tileno, op return false; } } - - if (tcd->tcp->tccps[compno].qmfbid == 1) { - dwt_decode(tilec, tilec->numresolutions - 1 - tcd->image->comps[compno].resno_decoded); - } else { - dwt_decode_real(tilec, tilec->numresolutions - 1 - tcd->image->comps[compno].resno_decoded); - } - } - dwt_time = opj_clock() - dwt_time; - opj_event_msg(tcd->cinfo, EVT_INFO, "- dwt took %f s\n", dwt_time); - - /*----------------MCT-------------------*/ - - if (tcd->tcp->mct) { - if (tcd->tcp->tccps[0].qmfbid == 1) { - mct_decode(tile->comps[0].data, tile->comps[1].data, tile->comps[2].data, - (tile->comps[0].x1 - tile->comps[0].x0) * (tile->comps[0].y1 - tile->comps[0].y0)); - } else { - mct_decode_real(tile->comps[0].data, tile->comps[1].data, tile->comps[2].data, - (tile->comps[0].x1 - tile->comps[0].x0) * (tile->comps[0].y1 - tile->comps[0].y0)); - } - } - - - /*---------------TILE-------------------*/ - - for (compno = 0; compno < tile->numcomps; compno++) { - opj_tcd_tilecomp_t *tilec = &tile->comps[compno]; - opj_tcd_resolution_t *res = &tilec->resolutions[tcd->image->comps[compno].resno_decoded]; - int adjust = tcd->image->comps[compno].sgnd ? 0 : 1 << (tcd->image->comps[compno].prec - 1); - int min = tcd->image->comps[compno].sgnd ? - -(1 << (tcd->image->comps[compno].prec - 1)) : 0; - int max = tcd->image->comps[compno].sgnd ? - (1 << (tcd->image->comps[compno].prec - 1)) - 1 : (1 << tcd->image->comps[compno].prec) - 1; - - int tw = tilec->x1 - tilec->x0; - int w = tcd->image->comps[compno].w; - - int i, j; - int offset_x = int_ceildivpow2(tcd->image->comps[compno].x0, tcd->image->comps[compno].factor); - int offset_y = int_ceildivpow2(tcd->image->comps[compno].y0, tcd->image->comps[compno].factor); - - for (j = res->y0; j < res->y1; j++) { - for (i = res->x0; i < res->x1; i++) { - int v; - float tmp = (float)((tilec->data[i - res->x0 + (j - res->y0) * tw]) / 8192.0); - - if (tcd->tcp->tccps[compno].qmfbid == 1) { - v = tilec->data[i - res->x0 + (j - res->y0) * tw]; - } else { - int tmp2 = ((int) (floor(fabs(tmp)))) + ((int) floor(fabs(tmp*2))%2); - v = ((tmp < 0) ? -tmp2:tmp2); - } - v += adjust; - - tcd->image->comps[compno].data[(i - offset_x) + (j - offset_y) * w] = int_clamp(v, min, max); + numres2decode = tcd->image->comps[compno].resno_decoded + 1; + if(numres2decode > 0){ + if (tcd->tcp->tccps[compno].qmfbid == 1) { + dwt_decode(tilec, numres2decode); + } else { + dwt_decode_real(tilec, numres2decode); } } } - + dwt_time = opj_clock() - dwt_time; + opj_event_msg(tcd->cinfo, EVT_INFO, "- dwt took %f s\n", dwt_time); + + /*----------------MCT-------------------*/ + + if (tcd->tcp->mct) { + int n = (tile->comps[0].x1 - tile->comps[0].x0) * (tile->comps[0].y1 - tile->comps[0].y0); + if (tcd->tcp->tccps[0].qmfbid == 1) { + mct_decode( + tile->comps[0].data, + tile->comps[1].data, + tile->comps[2].data, + n); + } else { + mct_decode_real( + (float*)tile->comps[0].data, + (float*)tile->comps[1].data, + (float*)tile->comps[2].data, + n); + } + } + + /*---------------TILE-------------------*/ + + for (compno = 0; compno < tile->numcomps; ++compno) { + opj_tcd_tilecomp_t* tilec = &tile->comps[compno]; + opj_image_comp_t* imagec = &tcd->image->comps[compno]; + opj_tcd_resolution_t* res = &tilec->resolutions[imagec->resno_decoded]; + int adjust = imagec->sgnd ? 0 : 1 << (imagec->prec - 1); + int min = imagec->sgnd ? -(1 << (imagec->prec - 1)) : 0; + int max = imagec->sgnd ? (1 << (imagec->prec - 1)) - 1 : (1 << imagec->prec) - 1; + + int tw = tilec->x1 - tilec->x0; + int w = imagec->w; + + int offset_x = int_ceildivpow2(imagec->x0, imagec->factor); + int offset_y = int_ceildivpow2(imagec->y0, imagec->factor); + + int i, j; + if(tcd->tcp->tccps[compno].qmfbid == 1) { + for(j = res->y0; j < res->y1; ++j) { + for(i = res->x0; i < res->x1; ++i) { + int v = tilec->data[i - res->x0 + (j - res->y0) * tw]; + v += adjust; + imagec->data[(i - offset_x) + (j - offset_y) * w] = int_clamp(v, min, max); + } + } + }else{ + for(j = res->y0; j < res->y1; ++j) { + for(i = res->x0; i < res->x1; ++i) { + float tmp = ((float*)tilec->data)[i - res->x0 + (j - res->y0) * tw]; + int v = lrintf(tmp); + v += adjust; + imagec->data[(i - offset_x) + (j - offset_y) * w] = int_clamp(v, min, max); + } + } + } + } + tile_time = opj_clock() - tile_time; /* time needed to decode a tile */ opj_event_msg(tcd->cinfo, EVT_INFO, "- tile decoded in %f s\n", tile_time); @@ -1475,8 +1489,3 @@ void tcd_free_decode_tile(opj_tcd_t *tcd, int tileno) { if (tile->comps != NULL) opj_free(tile->comps); } - - - - -