Merge pull request #550 from mayeut/msvc-emul

Use __emul under msvc x86 for fast 64 = 32 * 32
2015-07-27 20:14:18 +02:00 · 2015-07-27 20:14:18 +02:00 · 5b66156be4
commit 5b66156be4
parent 45ccf501f0 b0035538af
2 changed files with 14 additions and 2 deletions
--- a/src/lib/openjp2/opj_includes.h
+++ b/src/lib/openjp2/opj_includes.h
@ -162,12 +162,16 @@ static INLINE long opj_lrintf(float f) {
 }
 #endif

-
-
 #if defined(_MSC_VER) && (_MSC_VER < 1400)
 	#define vsnprintf _vsnprintf
 #endif

+/* MSVC x86 is really bad at doing int64 = int32 * int32 on its own. Use intrinsic. */
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
+#	include <intrin.h>
+#	pragma intrinsic(__emul)
+#endif
+
 #include "opj_inttypes.h"
 #include "opj_clock.h"
 #include "opj_malloc.h"
--- a/src/lib/openjp2/opj_intmath.h
+++ b/src/lib/openjp2/opj_intmath.h
@ -184,7 +184,11 @@ Multiply two fixed-precision rational numbers.
@return Returns a * b
 */
 static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
+	OPJ_INT64 temp = __emul(a, b);
+#else
 	OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ;
+#endif
 	temp += 4096;
 	assert((temp >> 13) <= (OPJ_INT64)0x7FFFFFFF);
 	assert((temp >> 13) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));
@ -192,7 +196,11 @@ static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) {
 }

 static INLINE OPJ_INT32 opj_int_fix_mul_t1(OPJ_INT32 a, OPJ_INT32 b) {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
+	OPJ_INT64 temp = __emul(a, b);
+#else
 	OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ;
+#endif
 	temp += 4096;
 	assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) <= (OPJ_INT64)0x7FFFFFFF);
 	assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));