From ee130e23066251776dc64529a5c0d885934dee78 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 26 Mar 2014 11:52:01 +0000 Subject: [PATCH] [trunk] Add small utilities to extract JP2 files from PDF --- tests/CMakeLists.txt | 1 + tests/pdf2jp2.c | 127 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 tests/pdf2jp2.c diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a62ceaf9..64f63e83 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -100,3 +100,4 @@ if(BUILD_JPIP) endif() add_executable(ppm2rgb3 ppm2rgb3.c) +#add_executable(pdf2jp2 pdf2jp2.c) diff --git a/tests/pdf2jp2.c b/tests/pdf2jp2.c new file mode 100644 index 00000000..830cbf4e --- /dev/null +++ b/tests/pdf2jp2.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2014, Mathieu Malaterre + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Extract all JP2 files contained within a PDF file. + * + * Technically you could simply use mutool, eg: + * + * $ mutool show -be -o obj58.jp2 Bug691816.pdf 58 + * + * to extract a given JP2 file from within a PDF + * However it happens sometimes that the PDF is itself corrupted, this tools is + * a lame PDF parser which only extract stream contained in JPXDecode box + * only work on linux since I need memmem function + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ +#define NUMJP2 32 + int i, c = 0; + long offets[NUMJP2]; + char buffer[512]; +#define BUFLEN 4096 + int cont = 1; + FILE *f; + size_t nread; + char haystack[BUFLEN]; + const char needle[] = "JPXDecode"; + + const size_t nlen = strlen( needle ); + const size_t flen = BUFLEN - nlen; + char *fpos = haystack + nlen; + const char *filename; + if( argc < 2 ) return 1; + + filename = argv[1]; + + memset( haystack, 0, nlen ); + + f = fopen( filename, "rb" ); + while( cont ) + { + const char *ret; + size_t hlen; + nread = fread(fpos, 1, flen, f); + hlen = nlen + nread; + ret = memmem( haystack, hlen, needle, nlen); + if( ret ) + { + const long cpos = ftell(f); + const ptrdiff_t diff = ret - haystack; + assert( diff >= 0 ); + /*fprintf( stdout, "Found it: %lx\n", (ptrdiff_t)cpos - (ptrdiff_t)hlen + diff);*/ + offets[c++] = (ptrdiff_t)cpos - (ptrdiff_t)hlen + diff; + } + cont = (nread == flen); + memcpy( haystack, haystack + nread, nlen ); + } + + assert( feof( f ) ); + + for( i = 0; i < c; ++i ) + { + int s, len = 0; + char *r; + const int ret = fseek(f, offets[i], SEEK_SET); + assert( ret == 0 ); + r = fgets(buffer, sizeof(buffer), f); + assert( r ); + /*fprintf( stderr, "DEBUG: %s", r );*/ + s = sscanf(r, "JPXDecode]/Length %d/Width %*d/BitsPerComponent %*d/Height %*d", &len); + assert( s == 1 ); + { + FILE *jp2; + int j; + char jp2fn[512]; + sprintf( jp2fn, "%s.%d.jp2", filename, i ); + jp2 = fopen( jp2fn, "wb" ); + for( j = 0; j < len; ++j ) + { + int v = fgetc(f); + int ret2 = fputc(v, jp2); + assert( ret2 != EOF ); + } + fclose( jp2 ); +#if 0 + /* TODO need to check we reached endstream */ + r = fgets(buffer, sizeof(buffer), f); + fprintf( stderr, "DEBUG: [%s]", r ); + r = fgets(buffer, sizeof(buffer), f); + fprintf( stderr, "DEBUG: [%s]", r ); +#endif + } + } + fclose(f); + + return 0; +}