diff --git a/src/physfs.c b/src/physfs.c index 7fd4456..ef621e8 100644 --- a/src/physfs.c +++ b/src/physfs.c @@ -1433,15 +1433,60 @@ char *__PHYSFS_strdup(const char *str) } /* __PHYSFS_strdup */ -PHYSFS_uint32 __PHYSFS_hashString(const char *str, size_t len) +PHYSFS_uint32 __PHYSFS_hashString(const char *str) { PHYSFS_uint32 hash = 5381; - while (len--) - hash = ((hash << 5) + hash) ^ *(str++); + while (1) + { + const char ch = *(str++); + if (ch == 0) + break; + hash = ((hash << 5) + hash) ^ ch; + } /* while */ return hash; } /* __PHYSFS_hashString */ +PHYSFS_uint32 __PHYSFS_hashStringCaseFold(const char *str) +{ + PHYSFS_uint32 hash = 5381; + while (1) + { + const PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&str); + if (cp == 0) + break; + else + { + PHYSFS_uint32 folded[3]; + const int numbytes = (int) (PHYSFS_caseFold(cp, folded) * sizeof (PHYSFS_uint32)); + const char *bytes = (const char *) folded; + int i; + for (i = 0; i < numbytes; i++) + hash = ((hash << 5) + hash) ^ *(bytes++); + } /* else */ + } /* while */ + + return hash; +} /* __PHYSFS_hashStringCaseFold */ + + +PHYSFS_uint32 __PHYSFS_hashStringCaseFoldUSAscii(const char *str) +{ + PHYSFS_uint32 hash = 5381; + while (1) + { + char ch = *(str++); + if (ch == 0) + break; + else if ((ch >= 'A') && (ch <= 'Z')) + ch -= ('A' - 'a'); + + hash = ((hash << 5) + hash) ^ ch; + } /* while */ + return hash; +} /* __PHYSFS_hashStringCaseFoldUSAscii */ + + /* MAKE SURE you hold stateLock before calling this! */ static int doRegisterArchiver(const PHYSFS_Archiver *_archiver) { @@ -3229,7 +3274,7 @@ static void setDefaultAllocator(void) } /* setDefaultAllocator */ -int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen) +int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen, const int case_sensitive, const int only_usascii) { static char rootpath[2] = { '/', '\0' }; size_t alloclen; @@ -3237,6 +3282,8 @@ int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen) assert(entrylen >= sizeof (__PHYSFS_DirTreeEntry)); memset(dt, '\0', sizeof (*dt)); + dt->case_sensitive = case_sensitive; + dt->only_usascii = only_usascii; dt->root = (__PHYSFS_DirTreeEntry *) allocator.Malloc(entrylen); BAIL_IF(!dt->root, PHYSFS_ERR_OUT_OF_MEMORY, 0); @@ -3257,9 +3304,10 @@ int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen) } /* __PHYSFS_DirTreeInit */ -static inline PHYSFS_uint32 hashPathName(__PHYSFS_DirTree *dt, const char *name) +static PHYSFS_uint32 hashPathName(__PHYSFS_DirTree *dt, const char *name) { - return __PHYSFS_hashString(name, strlen(name)) % dt->hashBuckets; + const PHYSFS_uint32 hashval = dt->case_sensitive ? __PHYSFS_hashString(name) : dt->only_usascii ? __PHYSFS_hashStringCaseFoldUSAscii(name) : __PHYSFS_hashStringCaseFold(name); + return hashval % dt->hashBuckets; } /* hashPathName */ @@ -3320,6 +3368,7 @@ void *__PHYSFS_DirTreeAdd(__PHYSFS_DirTree *dt, char *name, const int isdir) /* Find the __PHYSFS_DirTreeEntry for a path in platform-independent notation. */ void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path) { + const int cs = dt->case_sensitive; PHYSFS_uint32 hashval; __PHYSFS_DirTreeEntry *prev = NULL; __PHYSFS_DirTreeEntry *retval; @@ -3330,7 +3379,8 @@ void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path) hashval = hashPathName(dt, path); for (retval = dt->hash[hashval]; retval; retval = retval->hashnext) { - if (strcmp(retval->name, path) == 0) + const int cmp = cs ? strcmp(retval->name, path) : PHYSFS_utf8stricmp(retval->name, path); + if (cmp == 0) { if (prev != NULL) /* move this to the front of the list */ { diff --git a/src/physfs_archiver_7z.c b/src/physfs_archiver_7z.c index 501c4f8..44be3c9 100644 --- a/src/physfs_archiver_7z.c +++ b/src/physfs_archiver_7z.c @@ -185,7 +185,7 @@ static int szipLoadEntries(SZIPinfo *info) { int retval = 0; - if (__PHYSFS_DirTreeInit(&info->tree, sizeof (SZIPentry))) + if (__PHYSFS_DirTreeInit(&info->tree, sizeof (SZIPentry), 1, 0)) { const PHYSFS_uint32 count = info->db.NumFiles; PHYSFS_uint32 i; diff --git a/src/physfs_archiver_grp.c b/src/physfs_archiver_grp.c index 758475e..9a2978a 100644 --- a/src/physfs_archiver_grp.c +++ b/src/physfs_archiver_grp.c @@ -76,7 +76,7 @@ static void *GRP_openArchive(PHYSFS_Io *io, const char *name, BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, &count, sizeof(count)), NULL); count = PHYSFS_swapULE32(count); - unpkarc = UNPK_openArchive(io); + unpkarc = UNPK_openArchive(io, 0, 1); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!grpLoadEntries(io, count, unpkarc)) diff --git a/src/physfs_archiver_hog.c b/src/physfs_archiver_hog.c index 0fc1520..a818478 100644 --- a/src/physfs_archiver_hog.c +++ b/src/physfs_archiver_hog.c @@ -130,7 +130,7 @@ static void *HOG_openArchive(PHYSFS_Io *io, const char *name, *claimed = 1; - unpkarc = UNPK_openArchive(io); + unpkarc = UNPK_openArchive(io, 0, 1); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!(hog1 ? hog1LoadEntries(io, unpkarc) : hog2LoadEntries(io, unpkarc))) diff --git a/src/physfs_archiver_iso9660.c b/src/physfs_archiver_iso9660.c index 965c83f..6d68150 100644 --- a/src/physfs_archiver_iso9660.c +++ b/src/physfs_archiver_iso9660.c @@ -346,7 +346,8 @@ static void *ISO9660_openArchive(PHYSFS_Io *io, const char *filename, if (!parseVolumeDescriptor(io, &rootpos, &len, &joliet, claimed)) return NULL; - unpkarc = UNPK_openArchive(io); + /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */ + unpkarc = UNPK_openArchive(io, 1, 0); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!iso9660LoadEntries(io, joliet, "", rootpos, rootpos + len, unpkarc)) diff --git a/src/physfs_archiver_mvl.c b/src/physfs_archiver_mvl.c index 78b59f1..7a5c432 100644 --- a/src/physfs_archiver_mvl.c +++ b/src/physfs_archiver_mvl.c @@ -70,7 +70,7 @@ static void *MVL_openArchive(PHYSFS_Io *io, const char *name, BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, &count, sizeof(count)), NULL); count = PHYSFS_swapULE32(count); - unpkarc = UNPK_openArchive(io); + unpkarc = UNPK_openArchive(io, 0, 1); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!mvlLoadEntries(io, count, unpkarc)) diff --git a/src/physfs_archiver_qpak.c b/src/physfs_archiver_qpak.c index 15a5f2d..ddca271 100644 --- a/src/physfs_archiver_qpak.c +++ b/src/physfs_archiver_qpak.c @@ -86,7 +86,8 @@ static void *QPAK_openArchive(PHYSFS_Io *io, const char *name, BAIL_IF_ERRPASS(!io->seek(io, pos), NULL); - unpkarc = UNPK_openArchive(io); + /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */ + unpkarc = UNPK_openArchive(io, 1, 0); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!qpakLoadEntries(io, count, unpkarc)) diff --git a/src/physfs_archiver_slb.c b/src/physfs_archiver_slb.c index 4fc28d4..7a0a448 100644 --- a/src/physfs_archiver_slb.c +++ b/src/physfs_archiver_slb.c @@ -94,7 +94,8 @@ static void *SLB_openArchive(PHYSFS_Io *io, const char *name, /* seek to the table of contents */ BAIL_IF_ERRPASS(!io->seek(io, tocPos), NULL); - unpkarc = UNPK_openArchive(io); + /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */ + unpkarc = UNPK_openArchive(io, 1, 0); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!slbLoadEntries(io, count, unpkarc)) diff --git a/src/physfs_archiver_unpacked.c b/src/physfs_archiver_unpacked.c index 575efef..fbb12a0 100644 --- a/src/physfs_archiver_unpacked.c +++ b/src/physfs_archiver_unpacked.c @@ -285,12 +285,12 @@ void *UNPK_addEntry(void *opaque, char *name, const int isdir, } /* UNPK_addEntry */ -void *UNPK_openArchive(PHYSFS_Io *io) +void *UNPK_openArchive(PHYSFS_Io *io, const int case_sensitive, const int only_usascii) { UNPKinfo *info = (UNPKinfo *) allocator.Malloc(sizeof (UNPKinfo)); BAIL_IF(!info, PHYSFS_ERR_OUT_OF_MEMORY, NULL); - if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (UNPKentry))) + if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (UNPKentry), case_sensitive, only_usascii)) { allocator.Free(info); return NULL; diff --git a/src/physfs_archiver_vdf.c b/src/physfs_archiver_vdf.c index 99bbb2a..6d3a23f 100644 --- a/src/physfs_archiver_vdf.c +++ b/src/physfs_archiver_vdf.c @@ -129,7 +129,8 @@ static void *VDF_openArchive(PHYSFS_Io *io, const char *name, BAIL_IF_ERRPASS(!io->seek(io, rootCatOffset), NULL); - unpkarc = UNPK_openArchive(io); + /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */ + unpkarc = UNPK_openArchive(io, 1, 0); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!vdfLoadEntries(io, count, vdfDosTimeToEpoch(timestamp), unpkarc)) diff --git a/src/physfs_archiver_wad.c b/src/physfs_archiver_wad.c index b094c5b..d3ae045 100644 --- a/src/physfs_archiver_wad.c +++ b/src/physfs_archiver_wad.c @@ -95,7 +95,7 @@ static void *WAD_openArchive(PHYSFS_Io *io, const char *name, BAIL_IF_ERRPASS(!io->seek(io, directoryOffset), 0); - unpkarc = UNPK_openArchive(io); + unpkarc = UNPK_openArchive(io, 0, 1); BAIL_IF_ERRPASS(!unpkarc, NULL); if (!wadLoadEntries(io, count, unpkarc)) diff --git a/src/physfs_archiver_zip.c b/src/physfs_archiver_zip.c index 9972628..296cf26 100644 --- a/src/physfs_archiver_zip.c +++ b/src/physfs_archiver_zip.c @@ -1482,7 +1482,7 @@ static void *ZIP_openArchive(PHYSFS_Io *io, const char *name, if (!zip_parse_end_of_central_dir(info, &dstart, &cdir_ofs, &count)) goto ZIP_openarchive_failed; - else if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (ZIPentry))) + else if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (ZIPentry), 1, 0)) goto ZIP_openarchive_failed; root = (ZIPentry *) info->tree.root; diff --git a/src/physfs_internal.h b/src/physfs_internal.h index 9c60fbb..ecf5f10 100644 --- a/src/physfs_internal.h +++ b/src/physfs_internal.h @@ -322,7 +322,18 @@ char *__PHYSFS_strdup(const char *str); /* * Give a hash value for a C string (uses djb's xor hashing algorithm). */ -PHYSFS_uint32 __PHYSFS_hashString(const char *str, size_t len); +PHYSFS_uint32 __PHYSFS_hashString(const char *str); + +/* + * Give a hash value for a C string (uses djb's xor hashing algorithm), case folding as it goes. + */ +PHYSFS_uint32 __PHYSFS_hashStringCaseFold(const char *str); + +/* + * Give a hash value for a C string (uses djb's xor hashing algorithm), case folding as it goes, + * assuming that this is only US-ASCII chars (one byte per char, only 'A' through 'Z' need folding). + */ +PHYSFS_uint32 __PHYSFS_hashStringCaseFoldUSAscii(const char *str); /* @@ -358,9 +369,10 @@ int __PHYSFS_readAll(PHYSFS_Io *io, void *buf, const size_t len); /* These are shared between some archivers. */ +/* LOTS of legacy formats that only use US ASCII, not actually UTF-8, so let them optimize here. */ +void *UNPK_openArchive(PHYSFS_Io *io, const int case_sensitive, const int only_usascii); void UNPK_abandonArchive(void *opaque); void UNPK_closeArchive(void *opaque); -void *UNPK_openArchive(PHYSFS_Io *io); void *UNPK_addEntry(void *opaque, char *name, const int isdir, const PHYSFS_sint64 ctime, const PHYSFS_sint64 mtime, const PHYSFS_uint64 pos, const PHYSFS_uint64 len); @@ -392,10 +404,13 @@ typedef struct __PHYSFS_DirTree __PHYSFS_DirTreeEntry **hash; /* all entries hashed for fast lookup. */ size_t hashBuckets; /* number of buckets in hash. */ size_t entrylen; /* size in bytes of entries (including subclass). */ + int case_sensitive; /* non-zero to treat entries as case-sensitive in DirTreeFind */ + int only_usascii; /* non-zero to treat paths as US ASCII only (one byte per char, only 'A' through 'Z' are considered for case folding). */ } __PHYSFS_DirTree; -int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen); +/* LOTS of legacy formats that only use US ASCII, not actually UTF-8, so let them optimize here. */ +int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen, const int case_sensitive, const int only_usascii); void *__PHYSFS_DirTreeAdd(__PHYSFS_DirTree *dt, char *name, const int isdir); void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path); PHYSFS_EnumerateCallbackResult __PHYSFS_DirTreeEnumerate(void *opaque, @@ -725,6 +740,11 @@ int __PHYSFS_platformGrabMutex(void *mutex); */ void __PHYSFS_platformReleaseMutex(void *mutex); + +/* !!! FIXME: move to public API? */ +PHYSFS_uint32 __PHYSFS_utf8codepoint(const char **_str); + + #if PHYSFS_HAVE_PRAGMA_VISIBILITY #pragma GCC visibility pop #endif diff --git a/src/physfs_unicode.c b/src/physfs_unicode.c index 52c997c..bab4f8b 100644 --- a/src/physfs_unicode.c +++ b/src/physfs_unicode.c @@ -21,8 +21,8 @@ /* * This may not be the best value, but it's one that isn't represented * in Unicode (0x10FFFF is the largest codepoint value). We return this - * value from utf8codepoint() if there's bogus bits in the - * stream. utf8codepoint() will turn this value into something + * value from __PHYSFS_utf8codepoint() if there's bogus bits in the + * stream. __PHYSFS_utf8codepoint() will turn this value into something * reasonable (like a question mark), for text that wants to try to recover, * whereas utf8valid() will use the value to determine if a string has bad * bits. @@ -35,7 +35,7 @@ */ #define UNICODE_BOGUS_CHAR_CODEPOINT '?' -static PHYSFS_uint32 utf8codepoint(const char **_str) +PHYSFS_uint32 __PHYSFS_utf8codepoint(const char **_str) { const char *str = *_str; PHYSFS_uint32 retval = 0; @@ -188,6 +188,11 @@ static PHYSFS_uint32 utf8codepoint(const char **_str) } /* else if */ return UNICODE_BOGUS_CHAR_VALUE; +} /* __PHYSFS_utf8codepoint */ + +static inline PHYSFS_uint32 utf8codepoint(const char **_str) +{ + return __PHYSFS_utf8codepoint(_str); } /* utf8codepoint */ static PHYSFS_uint32 utf16codepoint(const PHYSFS_uint16 **_str) @@ -238,7 +243,7 @@ void PHYSFS_utf8ToUcs4(const char *src, PHYSFS_uint32 *dst, PHYSFS_uint64 len) len -= sizeof (PHYSFS_uint32); /* save room for null char. */ while (len >= sizeof (PHYSFS_uint32)) { - PHYSFS_uint32 cp = utf8codepoint(&src); + PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src); if (cp == 0) break; else if (cp == UNICODE_BOGUS_CHAR_VALUE) @@ -256,7 +261,7 @@ void PHYSFS_utf8ToUcs2(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len) len -= sizeof (PHYSFS_uint16); /* save room for null char. */ while (len >= sizeof (PHYSFS_uint16)) { - PHYSFS_uint32 cp = utf8codepoint(&src); + PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src); if (cp == 0) break; else if (cp == UNICODE_BOGUS_CHAR_VALUE) @@ -278,7 +283,7 @@ void PHYSFS_utf8ToUtf16(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len) len -= sizeof (PHYSFS_uint16); /* save room for null char. */ while (len >= sizeof (PHYSFS_uint16)) { - PHYSFS_uint32 cp = utf8codepoint(&src); + PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src); if (cp == 0) break; else if (cp == UNICODE_BOGUS_CHAR_VALUE)