diff --git a/CMakeLists.txt b/CMakeLists.txt index 7898d15c..2ef847fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -225,6 +225,16 @@ CHECK_INCLUDE_FILE("unistd.h" HAVE_UNISTD_H) include(TestLargeFiles) OPJ_TEST_LARGE_FILES(OPJ_HAVE_LARGEFILES) +# Allocating Aligned Memory Blocks +include(CheckIncludeFiles) +check_include_files(malloc.h HAVE_MALLOC_H) +include(CheckSymbolExists) +# _aligned_alloc https://msdn.microsoft.com/en-us/library/8z34s9c6.aspx +check_symbol_exists(_aligned_malloc malloc.h HAVE__ALIGNED_MALLOC) +# posix_memalign +check_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) +# memalign (obsolete) +check_symbol_exists(memalign malloc.h HAVE_MEMALIGN) #----------------------------------------------------------------------------- # Build Library if(BUILD_JPIP_SERVER) diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt index 3129bf53..500e905c 100644 --- a/src/lib/openjp2/CMakeLists.txt +++ b/src/lib/openjp2/CMakeLists.txt @@ -50,6 +50,7 @@ set(OPENJPEG_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/opj_codec.h ${CMAKE_CURRENT_SOURCE_DIR}/opj_includes.h ${CMAKE_CURRENT_SOURCE_DIR}/opj_intmath.h + ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.c ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.h ${CMAKE_CURRENT_SOURCE_DIR}/opj_stdint.h ) diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index eead5865..92752f42 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -567,9 +567,11 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); - - h.mem = (OPJ_INT32*) - opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32)); + + if (numres == 1U) { + return OPJ_TRUE; + } + h.mem = (OPJ_INT32*)opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32)); if (! h.mem){ /* FIXME event manager error callback */ return OPJ_FALSE; diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index c8d8cf1a..a8dc22b9 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -9947,6 +9947,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, OPJ_UINT32 l_nb_tiles; OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size; OPJ_BYTE * l_current_data = 00; + OPJ_BOOL l_reuse_data = OPJ_FALSE; opj_tcd_t* p_tcd = 00; /* preconditions */ @@ -9957,6 +9958,17 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, p_tcd = p_j2k->m_tcd; l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw; + if (l_nb_tiles == 1) { + l_reuse_data = OPJ_TRUE; +#ifdef __SSE__ + for (j=0;jm_tcd->image->numcomps;++j) { + opj_image_comp_t * l_img_comp = p_tcd->image->comps + j; + if (((size_t)l_img_comp->data & 0xFU) != 0U) { /* tile data shall be aligned on 16 bytes */ + l_reuse_data = OPJ_FALSE; + } + } +#endif + } for (i=0;im_tcd->image->numcomps;++j) { opj_tcd_tilecomp_t* l_tilec = p_tcd->tcd_image->tiles->comps + j; - if (l_nb_tiles == 1) { + if (l_reuse_data) { opj_image_comp_t * l_img_comp = p_tcd->image->comps + j; l_tilec->data = l_img_comp->data; l_tilec->ownsData = OPJ_FALSE; @@ -9984,7 +9996,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, } } l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd); - if (l_nb_tiles > 1) { + if (!l_reuse_data) { if (l_current_tile_size > l_max_tile_size) { OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, l_current_tile_size); if (! l_new_current_data) { diff --git a/src/lib/openjp2/mct.c b/src/lib/openjp2/mct.c index 8b0276f3..02259679 100644 --- a/src/lib/openjp2/mct.c +++ b/src/lib/openjp2/mct.c @@ -81,6 +81,10 @@ void opj_mct_encode( { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; + /* buffer are aligned on 16 bytes */ + assert( ((size_t)c0 & 0xf) == 0 ); + assert( ((size_t)c1 & 0xf) == 0 ); + assert( ((size_t)c2 & 0xf) == 0 ); for(i = 0; i < (len & ~3U); i += 4) { __m128i y, u, v; diff --git a/src/lib/openjp2/opj_config_private.h.cmake.in b/src/lib/openjp2/opj_config_private.h.cmake.in index 8a02c79d..97c9fdc5 100644 --- a/src/lib/openjp2/opj_config_private.h.cmake.in +++ b/src/lib/openjp2/opj_config_private.h.cmake.in @@ -17,6 +17,17 @@ #cmakedefine _FILE_OFFSET_BITS @_FILE_OFFSET_BITS@ #cmakedefine OPJ_HAVE_FSEEKO @OPJ_HAVE_FSEEKO@ +/* find whether or not have */ +#cmakedefine HAVE_MALLOC_H +/* check if function `aligned_alloc` exists */ +#cmakedefine HAVE_ALIGNED_ALLOC +/* check if function `_aligned_malloc` exists */ +#cmakedefine HAVE__ALIGNED_MALLOC +/* check if function `memalign` exists */ +#cmakedefine HAVE_MEMALIGN +/* check if function `posix_memalign` exists */ +#cmakedefine HAVE_POSIX_MEMALIGN + /* Byte order. */ /* All compilers that support Mac OS X define either __BIG_ENDIAN__ or __LITTLE_ENDIAN__ to match the endianness of the architecture being @@ -28,4 +39,4 @@ On other platforms we use the result of the TRY_RUN. */ #cmakedefine OPJ_BIG_ENDIAN #elif defined(__BIG_ENDIAN__) # define OPJ_BIG_ENDIAN -#endif \ No newline at end of file +#endif diff --git a/src/lib/openjp2/opj_malloc.c b/src/lib/openjp2/opj_malloc.c new file mode 100644 index 00000000..3bbf80d8 --- /dev/null +++ b/src/lib/openjp2/opj_malloc.c @@ -0,0 +1,149 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2015, Mathieu Malaterre + * Copyright (c) 2015, Matthieu Darbois + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#define OPJ_SKIP_POISON +#include "opj_includes.h" + +static INLINE void *opj_aligned_alloc_n(size_t alignment, size_t size) +{ + void* ptr; + + /* alignment shall be power of 2 */ + assert( (alignment != 0U) && ((alignment & (alignment - 1U)) == 0U)); + + if (size == 0U) { /* prevent implementation defined behavior of realloc */ + return NULL; + } + +#if defined(HAVE_POSIX_MEMALIGN) + /* aligned_alloc requires c11, restrict to posix_memalign for now. Quote: + * This function was introduced in POSIX 1003.1d. Although this function is + * superseded by aligned_alloc, it is more portable to older POSIX systems + * that do not support ISO C11. */ + if (posix_memalign (&ptr, alignment, size)) + { + ptr = NULL; + } + /* older linux */ +#elif defined(HAVE_MEMALIGN) + ptr = memalign( alignment, size ); +/* _MSC_VER */ +#elif defined(HAVE__ALIGNED_MALLOC) + ptr = _aligned_malloc(size, alignment); +#else +/* TODO: _mm_malloc(x,y) */ +#error missing aligned alloc function +#endif + return ptr; +} +static INLINE void *opj_aligned_realloc_n(void *ptr, size_t alignment, size_t new_size) +{ + void *r_ptr; + + /* alignment shall be power of 2 */ + assert( (alignment != 0U) && ((alignment & (alignment - 1U)) == 0U)); + + if (new_size == 0U) { /* prevent implementation defined behavior of realloc */ + return NULL; + } + +/* no portable aligned realloc */ +#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) + /* glibc doc states one can mixed aligned malloc with realloc */ + r_ptr = realloc( ptr, new_size ); /* fast path */ + /* we simply use `size_t` to cast, since we are only interest in binary AND + * operator */ + if( ((size_t)r_ptr & (alignment - 1U)) != 0U ) { + /* this is non-trivial to implement a portable aligned realloc, so use a + * simple approach where we do not need a function that return the size of an + * allocated array (eg. _msize on Windows, malloc_size on MacOS, + * malloc_usable_size on systems with glibc) */ + void *a_ptr = opj_aligned_alloc_n(alignment, new_size); + if (a_ptr != NULL) { + memcpy(a_ptr, r_ptr, new_size); + } + free( r_ptr ); + r_ptr = a_ptr; + } +/* _MSC_VER */ +#elif defined(HAVE__ALIGNED_MALLOC) + r_ptr = _aligned_realloc( ptr, new_size, alignment ); +#else +/* TODO: _mm_malloc(x,y) */ +#error missing aligned realloc function +#endif + return r_ptr; +} +void * opj_malloc(size_t size) +{ + if (size == 0U) { /* prevent implementation defined behavior of realloc */ + return NULL; + } + return malloc(size); +} +void * opj_calloc(size_t num, size_t size) +{ + if (size == 0U) { /* prevent implementation defined behavior of realloc */ + return NULL; + } + /* according to C89 standard, num == 0 shall return a valid pointer */ + return calloc(num, size); +} + +void *opj_aligned_malloc(size_t size) +{ + return opj_aligned_alloc_n(16U, size); +} +void * opj_aligned_realloc(void *ptr, size_t size) +{ + return opj_aligned_realloc_n(ptr, 16U, size); +} + +void opj_aligned_free(void* ptr) +{ +#ifdef HAVE__ALIGNED_MALLOC + _aligned_free( ptr ); +#else + free( ptr ); +#endif +} + +void * opj_realloc(void *ptr, size_t new_size) +{ + if (new_size == 0U) { /* prevent implementation defined behavior of realloc */ + return NULL; + } + return realloc(ptr, new_size); +} +void opj_free(void *ptr) +{ + free(ptr); +} diff --git a/src/lib/openjp2/opj_malloc.h b/src/lib/openjp2/opj_malloc.h index 517707f9..1b3fced9 100644 --- a/src/lib/openjp2/opj_malloc.h +++ b/src/lib/openjp2/opj_malloc.h @@ -31,6 +31,8 @@ */ #ifndef __OPJ_MALLOC_H #define __OPJ_MALLOC_H + +#include /** @file opj_malloc.h @brief Internal functions @@ -50,16 +52,7 @@ Allocate an uninitialized memory block @param size Bytes to allocate @return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available */ -#ifdef ALLOC_PERF_OPT -void * OPJ_CALLCONV opj_malloc(size_t size); -#else -/* prevent assertion on overflow for MSVC */ -#ifdef _MSC_VER -#define opj_malloc(size) ((size_t)(size) >= (size_t)-0x100 ? NULL : malloc(size)) -#else -#define opj_malloc(size) malloc(size) -#endif -#endif +void * opj_malloc(size_t size); /** Allocate a memory block with elements initialized to 0 @@ -67,83 +60,16 @@ Allocate a memory block with elements initialized to 0 @param size Bytes per block to allocate @return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available */ -#ifdef ALLOC_PERF_OPT -void * OPJ_CALLCONV opj_calloc(size_t _NumOfElements, size_t _SizeOfElements); -#else -/* prevent assertion on overflow for MSVC */ -#ifdef _MSC_VER -#define opj_calloc(num, size) ((size_t)(num) != 0 && (size_t)(num) >= (size_t)-0x100 / (size_t)(size) ? NULL : calloc(num, size)) -#else -#define opj_calloc(num, size) calloc(num, size) -#endif -#endif +void * opj_calloc(size_t numOfElements, size_t sizeOfElements); /** Allocate memory aligned to a 16 byte boundary @param size Bytes to allocate @return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available */ -/* FIXME: These should be set with cmake tests, but we're currently not requiring use of cmake */ -#ifdef _WIN32 - /* Someone should tell the mingw people that their malloc.h ought to provide _mm_malloc() */ - #ifdef __GNUC__ - #include - #define HAVE_MM_MALLOC - #else /* MSVC, Intel C++ */ - #include - #ifdef _mm_malloc - #define HAVE_MM_MALLOC - #endif - #endif -#else /* Not _WIN32 */ - #if defined(__sun) - #define HAVE_MEMALIGN - #elif defined(__FreeBSD__) - #define HAVE_POSIX_MEMALIGN - /* Linux x86_64 and OSX always align allocations to 16 bytes */ - #elif !defined(__amd64__) && !defined(__APPLE__) && !defined(_AIX) - #define HAVE_MEMALIGN - #include - #endif -#endif - -#define opj_aligned_malloc(size) malloc(size) -#define opj_aligned_free(m) free(m) - -#ifdef HAVE_MM_MALLOC - #undef opj_aligned_malloc - #define opj_aligned_malloc(size) _mm_malloc(size, 16) - #undef opj_aligned_free - #define opj_aligned_free(m) _mm_free(m) -#endif - -#ifdef HAVE_MEMALIGN - extern void* memalign(size_t, size_t); - #undef opj_aligned_malloc - #define opj_aligned_malloc(size) memalign(16, (size)) - #undef opj_aligned_free - #define opj_aligned_free(m) free(m) -#endif - -#ifdef HAVE_POSIX_MEMALIGN - #undef opj_aligned_malloc - extern int posix_memalign(void**, size_t, size_t); - - static INLINE void* __attribute__ ((malloc)) opj_aligned_malloc(size_t size){ - void* mem = NULL; - posix_memalign(&mem, 16, size); - return mem; - } - #undef opj_aligned_free - #define opj_aligned_free(m) free(m) -#endif - -#ifdef ALLOC_PERF_OPT - #undef opj_aligned_malloc - #define opj_aligned_malloc(size) opj_malloc(size) - #undef opj_aligned_free - #define opj_aligned_free(m) opj_free(m) -#endif +void * opj_aligned_malloc(size_t size); +void * opj_aligned_realloc(void *ptr, size_t size); +void opj_aligned_free(void* ptr); /** Reallocate memory blocks. @@ -151,28 +77,15 @@ Reallocate memory blocks. @param s New size in bytes @return Returns a void pointer to the reallocated (and possibly moved) memory block */ -#ifdef ALLOC_PERF_OPT -void * OPJ_CALLCONV opj_realloc(void * m, size_t s); -#else -/* prevent assertion on overflow for MSVC */ -#ifdef _MSC_VER -#define opj_realloc(m, s) ((size_t)(s) >= (size_t)-0x100 ? NULL : realloc(m, s)) -#else -#define opj_realloc(m, s) realloc(m, s) -#endif -#endif +void * opj_realloc(void * m, size_t s); /** Deallocates or frees a memory block. @param m Previously allocated memory block to be freed */ -#ifdef ALLOC_PERF_OPT -void OPJ_CALLCONV opj_free(void * m); -#else -#define opj_free(m) free(m) -#endif +void opj_free(void * m); -#ifdef __GNUC__ +#if defined(__GNUC__) && !defined(OPJ_SKIP_POISON) #pragma GCC poison malloc calloc realloc free #endif diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 2fccff1c..6eeb211e 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -626,7 +626,7 @@ void opj_tcd_destroy(opj_tcd_t *tcd) { OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) { if ((l_tilec->data == 00) || ((l_tilec->data_size_needed > l_tilec->data_size) && (l_tilec->ownsData == OPJ_FALSE))) { - l_tilec->data = (OPJ_INT32 *) opj_malloc(l_tilec->data_size_needed); + l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed); if (! l_tilec->data ) { return OPJ_FALSE; } @@ -635,18 +635,15 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) l_tilec->ownsData = OPJ_TRUE; } else if (l_tilec->data_size_needed > l_tilec->data_size) { - OPJ_INT32 * new_data = (OPJ_INT32 *) opj_realloc(l_tilec->data, l_tilec->data_size_needed); - /* opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to handle tile datan"); */ - /* fprintf(stderr, "Not enough memory to handle tile data"); */ - if (! new_data) { - opj_free(l_tilec->data); - l_tilec->data = NULL; + /* We don't need to keep old data */ + opj_aligned_free(l_tilec->data); + l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed); + if (! l_tilec->data ) { l_tilec->data_size = 0; l_tilec->data_size_needed = 0; l_tilec->ownsData = OPJ_FALSE; return OPJ_FALSE; } - l_tilec->data = new_data; /*fprintf(stderr, "tReallocate data of tilec (int): from %d to %d x OPJ_UINT32n", l_tilec->data_size, l_data_size);*/ l_tilec->data_size = l_tilec->data_size_needed; l_tilec->ownsData = OPJ_TRUE; @@ -874,7 +871,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, (OPJ_INT32) (numbps - l_step_size->expn)))) * fraction; l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - 1; /* WHY -1 ? */ - if (! l_band->precincts) { + if (!l_band->precincts && (l_nb_precincts > 0U)) { l_band->precincts = (opj_tcd_precinct_t *) opj_malloc( /*3 * */ l_nb_precinct_size); if (! l_band->precincts) { return OPJ_FALSE; @@ -933,7 +930,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, /*fprintf(stderr, "\t\t\t\t precinct_cw = %d x recinct_ch = %d\n",l_current_precinct->cw, l_current_precinct->ch); */ l_nb_code_blocks_size = l_nb_code_blocks * (OPJ_UINT32)sizeof_block; - if (! l_current_precinct->cblks.blocks) { + if (!l_current_precinct->cblks.blocks && (l_nb_code_blocks > 0U)) { l_current_precinct->cblks.blocks = opj_malloc(l_nb_code_blocks_size); if (! l_current_precinct->cblks.blocks ) { return OPJ_FALSE; @@ -1521,7 +1518,7 @@ static void opj_tcd_free_tile(opj_tcd_t *p_tcd) } if (l_tile_comp->ownsData && l_tile_comp->data) { - opj_free(l_tile_comp->data); + opj_aligned_free(l_tile_comp->data); l_tile_comp->data = 00; l_tile_comp->ownsData = 0; l_tile_comp->data_size = 0;