Merge pull request #636 from uclouvain/opj_malloc-625

Update allocation functions
Fix #625 
Fix #624
Fix #635
This commit is contained in:
Matthieu Darbois 2015-10-18 03:14:55 +02:00
commit 05b3afd28f
9 changed files with 213 additions and 114 deletions

View File

@ -225,6 +225,16 @@ CHECK_INCLUDE_FILE("unistd.h" HAVE_UNISTD_H)
include(TestLargeFiles)
OPJ_TEST_LARGE_FILES(OPJ_HAVE_LARGEFILES)
# Allocating Aligned Memory Blocks
include(CheckIncludeFiles)
check_include_files(malloc.h HAVE_MALLOC_H)
include(CheckSymbolExists)
# _aligned_alloc https://msdn.microsoft.com/en-us/library/8z34s9c6.aspx
check_symbol_exists(_aligned_malloc malloc.h HAVE__ALIGNED_MALLOC)
# posix_memalign
check_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN)
# memalign (obsolete)
check_symbol_exists(memalign malloc.h HAVE_MEMALIGN)
#-----------------------------------------------------------------------------
# Build Library
if(BUILD_JPIP_SERVER)

View File

@ -50,6 +50,7 @@ set(OPENJPEG_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/opj_codec.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_includes.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_intmath.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.c
${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_stdint.h
)

View File

@ -568,8 +568,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres
OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
h.mem = (OPJ_INT32*)
opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
if (numres == 1U) {
return OPJ_TRUE;
}
h.mem = (OPJ_INT32*)opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
if (! h.mem){
/* FIXME event manager error callback */
return OPJ_FALSE;

View File

@ -9947,6 +9947,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
OPJ_UINT32 l_nb_tiles;
OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size;
OPJ_BYTE * l_current_data = 00;
OPJ_BOOL l_reuse_data = OPJ_FALSE;
opj_tcd_t* p_tcd = 00;
/* preconditions */
@ -9957,6 +9958,17 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
p_tcd = p_j2k->m_tcd;
l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw;
if (l_nb_tiles == 1) {
l_reuse_data = OPJ_TRUE;
#ifdef __SSE__
for (j=0;j<p_j2k->m_tcd->image->numcomps;++j) {
opj_image_comp_t * l_img_comp = p_tcd->image->comps + j;
if (((size_t)l_img_comp->data & 0xFU) != 0U) { /* tile data shall be aligned on 16 bytes */
l_reuse_data = OPJ_FALSE;
}
}
#endif
}
for (i=0;i<l_nb_tiles;++i) {
if (! opj_j2k_pre_write_tile(p_j2k,i,p_stream,p_manager)) {
if (l_current_data) {
@ -9969,7 +9981,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
/* otherwise, allocate the data */
for (j=0;j<p_j2k->m_tcd->image->numcomps;++j) {
opj_tcd_tilecomp_t* l_tilec = p_tcd->tcd_image->tiles->comps + j;
if (l_nb_tiles == 1) {
if (l_reuse_data) {
opj_image_comp_t * l_img_comp = p_tcd->image->comps + j;
l_tilec->data = l_img_comp->data;
l_tilec->ownsData = OPJ_FALSE;
@ -9984,7 +9996,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
}
}
l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd);
if (l_nb_tiles > 1) {
if (!l_reuse_data) {
if (l_current_tile_size > l_max_tile_size) {
OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, l_current_tile_size);
if (! l_new_current_data) {

View File

@ -81,6 +81,10 @@ void opj_mct_encode(
{
OPJ_SIZE_T i;
const OPJ_SIZE_T len = n;
/* buffer are aligned on 16 bytes */
assert( ((size_t)c0 & 0xf) == 0 );
assert( ((size_t)c1 & 0xf) == 0 );
assert( ((size_t)c2 & 0xf) == 0 );
for(i = 0; i < (len & ~3U); i += 4) {
__m128i y, u, v;

View File

@ -17,6 +17,17 @@
#cmakedefine _FILE_OFFSET_BITS @_FILE_OFFSET_BITS@
#cmakedefine OPJ_HAVE_FSEEKO @OPJ_HAVE_FSEEKO@
/* find whether or not have <malloc.h> */
#cmakedefine HAVE_MALLOC_H
/* check if function `aligned_alloc` exists */
#cmakedefine HAVE_ALIGNED_ALLOC
/* check if function `_aligned_malloc` exists */
#cmakedefine HAVE__ALIGNED_MALLOC
/* check if function `memalign` exists */
#cmakedefine HAVE_MEMALIGN
/* check if function `posix_memalign` exists */
#cmakedefine HAVE_POSIX_MEMALIGN
/* Byte order. */
/* All compilers that support Mac OS X define either __BIG_ENDIAN__ or
__LITTLE_ENDIAN__ to match the endianness of the architecture being

View File

@ -0,0 +1,149 @@
/*
* The copyright in this software is being made available under the 2-clauses
* BSD License, included below. This software may be subject to other third
* party and contributor rights, including patent rights, and no such rights
* are granted under this license.
*
* Copyright (c) 2015, Mathieu Malaterre <mathieu.malaterre@gmail.com>
* Copyright (c) 2015, Matthieu Darbois
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#define OPJ_SKIP_POISON
#include "opj_includes.h"
static INLINE void *opj_aligned_alloc_n(size_t alignment, size_t size)
{
void* ptr;
/* alignment shall be power of 2 */
assert( (alignment != 0U) && ((alignment & (alignment - 1U)) == 0U));
if (size == 0U) { /* prevent implementation defined behavior of realloc */
return NULL;
}
#if defined(HAVE_POSIX_MEMALIGN)
/* aligned_alloc requires c11, restrict to posix_memalign for now. Quote:
* This function was introduced in POSIX 1003.1d. Although this function is
* superseded by aligned_alloc, it is more portable to older POSIX systems
* that do not support ISO C11. */
if (posix_memalign (&ptr, alignment, size))
{
ptr = NULL;
}
/* older linux */
#elif defined(HAVE_MEMALIGN)
ptr = memalign( alignment, size );
/* _MSC_VER */
#elif defined(HAVE__ALIGNED_MALLOC)
ptr = _aligned_malloc(size, alignment);
#else
/* TODO: _mm_malloc(x,y) */
#error missing aligned alloc function
#endif
return ptr;
}
static INLINE void *opj_aligned_realloc_n(void *ptr, size_t alignment, size_t new_size)
{
void *r_ptr;
/* alignment shall be power of 2 */
assert( (alignment != 0U) && ((alignment & (alignment - 1U)) == 0U));
if (new_size == 0U) { /* prevent implementation defined behavior of realloc */
return NULL;
}
/* no portable aligned realloc */
#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
/* glibc doc states one can mixed aligned malloc with realloc */
r_ptr = realloc( ptr, new_size ); /* fast path */
/* we simply use `size_t` to cast, since we are only interest in binary AND
* operator */
if( ((size_t)r_ptr & (alignment - 1U)) != 0U ) {
/* this is non-trivial to implement a portable aligned realloc, so use a
* simple approach where we do not need a function that return the size of an
* allocated array (eg. _msize on Windows, malloc_size on MacOS,
* malloc_usable_size on systems with glibc) */
void *a_ptr = opj_aligned_alloc_n(alignment, new_size);
if (a_ptr != NULL) {
memcpy(a_ptr, r_ptr, new_size);
}
free( r_ptr );
r_ptr = a_ptr;
}
/* _MSC_VER */
#elif defined(HAVE__ALIGNED_MALLOC)
r_ptr = _aligned_realloc( ptr, new_size, alignment );
#else
/* TODO: _mm_malloc(x,y) */
#error missing aligned realloc function
#endif
return r_ptr;
}
void * opj_malloc(size_t size)
{
if (size == 0U) { /* prevent implementation defined behavior of realloc */
return NULL;
}
return malloc(size);
}
void * opj_calloc(size_t num, size_t size)
{
if (size == 0U) { /* prevent implementation defined behavior of realloc */
return NULL;
}
/* according to C89 standard, num == 0 shall return a valid pointer */
return calloc(num, size);
}
void *opj_aligned_malloc(size_t size)
{
return opj_aligned_alloc_n(16U, size);
}
void * opj_aligned_realloc(void *ptr, size_t size)
{
return opj_aligned_realloc_n(ptr, 16U, size);
}
void opj_aligned_free(void* ptr)
{
#ifdef HAVE__ALIGNED_MALLOC
_aligned_free( ptr );
#else
free( ptr );
#endif
}
void * opj_realloc(void *ptr, size_t new_size)
{
if (new_size == 0U) { /* prevent implementation defined behavior of realloc */
return NULL;
}
return realloc(ptr, new_size);
}
void opj_free(void *ptr)
{
free(ptr);
}

View File

@ -31,6 +31,8 @@
*/
#ifndef __OPJ_MALLOC_H
#define __OPJ_MALLOC_H
#include <stddef.h>
/**
@file opj_malloc.h
@brief Internal functions
@ -50,16 +52,7 @@ Allocate an uninitialized memory block
@param size Bytes to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
#ifdef ALLOC_PERF_OPT
void * OPJ_CALLCONV opj_malloc(size_t size);
#else
/* prevent assertion on overflow for MSVC */
#ifdef _MSC_VER
#define opj_malloc(size) ((size_t)(size) >= (size_t)-0x100 ? NULL : malloc(size))
#else
#define opj_malloc(size) malloc(size)
#endif
#endif
void * opj_malloc(size_t size);
/**
Allocate a memory block with elements initialized to 0
@ -67,83 +60,16 @@ Allocate a memory block with elements initialized to 0
@param size Bytes per block to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
#ifdef ALLOC_PERF_OPT
void * OPJ_CALLCONV opj_calloc(size_t _NumOfElements, size_t _SizeOfElements);
#else
/* prevent assertion on overflow for MSVC */
#ifdef _MSC_VER
#define opj_calloc(num, size) ((size_t)(num) != 0 && (size_t)(num) >= (size_t)-0x100 / (size_t)(size) ? NULL : calloc(num, size))
#else
#define opj_calloc(num, size) calloc(num, size)
#endif
#endif
void * opj_calloc(size_t numOfElements, size_t sizeOfElements);
/**
Allocate memory aligned to a 16 byte boundary
@param size Bytes to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
/* FIXME: These should be set with cmake tests, but we're currently not requiring use of cmake */
#ifdef _WIN32
/* Someone should tell the mingw people that their malloc.h ought to provide _mm_malloc() */
#ifdef __GNUC__
#include <mm_malloc.h>
#define HAVE_MM_MALLOC
#else /* MSVC, Intel C++ */
#include <malloc.h>
#ifdef _mm_malloc
#define HAVE_MM_MALLOC
#endif
#endif
#else /* Not _WIN32 */
#if defined(__sun)
#define HAVE_MEMALIGN
#elif defined(__FreeBSD__)
#define HAVE_POSIX_MEMALIGN
/* Linux x86_64 and OSX always align allocations to 16 bytes */
#elif !defined(__amd64__) && !defined(__APPLE__) && !defined(_AIX)
#define HAVE_MEMALIGN
#include <malloc.h>
#endif
#endif
#define opj_aligned_malloc(size) malloc(size)
#define opj_aligned_free(m) free(m)
#ifdef HAVE_MM_MALLOC
#undef opj_aligned_malloc
#define opj_aligned_malloc(size) _mm_malloc(size, 16)
#undef opj_aligned_free
#define opj_aligned_free(m) _mm_free(m)
#endif
#ifdef HAVE_MEMALIGN
extern void* memalign(size_t, size_t);
#undef opj_aligned_malloc
#define opj_aligned_malloc(size) memalign(16, (size))
#undef opj_aligned_free
#define opj_aligned_free(m) free(m)
#endif
#ifdef HAVE_POSIX_MEMALIGN
#undef opj_aligned_malloc
extern int posix_memalign(void**, size_t, size_t);
static INLINE void* __attribute__ ((malloc)) opj_aligned_malloc(size_t size){
void* mem = NULL;
posix_memalign(&mem, 16, size);
return mem;
}
#undef opj_aligned_free
#define opj_aligned_free(m) free(m)
#endif
#ifdef ALLOC_PERF_OPT
#undef opj_aligned_malloc
#define opj_aligned_malloc(size) opj_malloc(size)
#undef opj_aligned_free
#define opj_aligned_free(m) opj_free(m)
#endif
void * opj_aligned_malloc(size_t size);
void * opj_aligned_realloc(void *ptr, size_t size);
void opj_aligned_free(void* ptr);
/**
Reallocate memory blocks.
@ -151,28 +77,15 @@ Reallocate memory blocks.
@param s New size in bytes
@return Returns a void pointer to the reallocated (and possibly moved) memory block
*/
#ifdef ALLOC_PERF_OPT
void * OPJ_CALLCONV opj_realloc(void * m, size_t s);
#else
/* prevent assertion on overflow for MSVC */
#ifdef _MSC_VER
#define opj_realloc(m, s) ((size_t)(s) >= (size_t)-0x100 ? NULL : realloc(m, s))
#else
#define opj_realloc(m, s) realloc(m, s)
#endif
#endif
void * opj_realloc(void * m, size_t s);
/**
Deallocates or frees a memory block.
@param m Previously allocated memory block to be freed
*/
#ifdef ALLOC_PERF_OPT
void OPJ_CALLCONV opj_free(void * m);
#else
#define opj_free(m) free(m)
#endif
void opj_free(void * m);
#ifdef __GNUC__
#if defined(__GNUC__) && !defined(OPJ_SKIP_POISON)
#pragma GCC poison malloc calloc realloc free
#endif

View File

@ -626,7 +626,7 @@ void opj_tcd_destroy(opj_tcd_t *tcd) {
OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec)
{
if ((l_tilec->data == 00) || ((l_tilec->data_size_needed > l_tilec->data_size) && (l_tilec->ownsData == OPJ_FALSE))) {
l_tilec->data = (OPJ_INT32 *) opj_malloc(l_tilec->data_size_needed);
l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed);
if (! l_tilec->data ) {
return OPJ_FALSE;
}
@ -635,18 +635,15 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec)
l_tilec->ownsData = OPJ_TRUE;
}
else if (l_tilec->data_size_needed > l_tilec->data_size) {
OPJ_INT32 * new_data = (OPJ_INT32 *) opj_realloc(l_tilec->data, l_tilec->data_size_needed);
/* opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to handle tile datan"); */
/* fprintf(stderr, "Not enough memory to handle tile data"); */
if (! new_data) {
opj_free(l_tilec->data);
l_tilec->data = NULL;
/* We don't need to keep old data */
opj_aligned_free(l_tilec->data);
l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed);
if (! l_tilec->data ) {
l_tilec->data_size = 0;
l_tilec->data_size_needed = 0;
l_tilec->ownsData = OPJ_FALSE;
return OPJ_FALSE;
}
l_tilec->data = new_data;
/*fprintf(stderr, "tReallocate data of tilec (int): from %d to %d x OPJ_UINT32n", l_tilec->data_size, l_data_size);*/
l_tilec->data_size = l_tilec->data_size_needed;
l_tilec->ownsData = OPJ_TRUE;
@ -874,7 +871,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, (OPJ_INT32) (numbps - l_step_size->expn)))) * fraction;
l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - 1; /* WHY -1 ? */
if (! l_band->precincts) {
if (!l_band->precincts && (l_nb_precincts > 0U)) {
l_band->precincts = (opj_tcd_precinct_t *) opj_malloc( /*3 * */ l_nb_precinct_size);
if (! l_band->precincts) {
return OPJ_FALSE;
@ -933,7 +930,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
/*fprintf(stderr, "\t\t\t\t precinct_cw = %d x recinct_ch = %d\n",l_current_precinct->cw, l_current_precinct->ch); */
l_nb_code_blocks_size = l_nb_code_blocks * (OPJ_UINT32)sizeof_block;
if (! l_current_precinct->cblks.blocks) {
if (!l_current_precinct->cblks.blocks && (l_nb_code_blocks > 0U)) {
l_current_precinct->cblks.blocks = opj_malloc(l_nb_code_blocks_size);
if (! l_current_precinct->cblks.blocks ) {
return OPJ_FALSE;
@ -1521,7 +1518,7 @@ static void opj_tcd_free_tile(opj_tcd_t *p_tcd)
}
if (l_tile_comp->ownsData && l_tile_comp->data) {
opj_free(l_tile_comp->data);
opj_aligned_free(l_tile_comp->data);
l_tile_comp->data = 00;
l_tile_comp->ownsData = 0;
l_tile_comp->data_size = 0;