Let several archives be case-insensitive.

(and several more probably _should_ be but I don't have the details on
them at the moment. But now it's just changing a 1 to a 0 to fix those!)
This commit is contained in:
Ryan C. Gordon 2022-05-20 17:36:06 -04:00
parent 14691399cd
commit 17b691b0ea
No known key found for this signature in database
GPG Key ID: FA148B892AB48044
14 changed files with 107 additions and 28 deletions

View File

@ -1433,15 +1433,60 @@ char *__PHYSFS_strdup(const char *str)
} /* __PHYSFS_strdup */
PHYSFS_uint32 __PHYSFS_hashString(const char *str, size_t len)
PHYSFS_uint32 __PHYSFS_hashString(const char *str)
{
PHYSFS_uint32 hash = 5381;
while (len--)
hash = ((hash << 5) + hash) ^ *(str++);
while (1)
{
const char ch = *(str++);
if (ch == 0)
break;
hash = ((hash << 5) + hash) ^ ch;
} /* while */
return hash;
} /* __PHYSFS_hashString */
PHYSFS_uint32 __PHYSFS_hashStringCaseFold(const char *str)
{
PHYSFS_uint32 hash = 5381;
while (1)
{
const PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&str);
if (cp == 0)
break;
else
{
PHYSFS_uint32 folded[3];
const int numbytes = (int) (PHYSFS_caseFold(cp, folded) * sizeof (PHYSFS_uint32));
const char *bytes = (const char *) folded;
int i;
for (i = 0; i < numbytes; i++)
hash = ((hash << 5) + hash) ^ *(bytes++);
} /* else */
} /* while */
return hash;
} /* __PHYSFS_hashStringCaseFold */
PHYSFS_uint32 __PHYSFS_hashStringCaseFoldUSAscii(const char *str)
{
PHYSFS_uint32 hash = 5381;
while (1)
{
char ch = *(str++);
if (ch == 0)
break;
else if ((ch >= 'A') && (ch <= 'Z'))
ch -= ('A' - 'a');
hash = ((hash << 5) + hash) ^ ch;
} /* while */
return hash;
} /* __PHYSFS_hashStringCaseFoldUSAscii */
/* MAKE SURE you hold stateLock before calling this! */
static int doRegisterArchiver(const PHYSFS_Archiver *_archiver)
{
@ -3229,7 +3274,7 @@ static void setDefaultAllocator(void)
} /* setDefaultAllocator */
int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen)
int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen, const int case_sensitive, const int only_usascii)
{
static char rootpath[2] = { '/', '\0' };
size_t alloclen;
@ -3237,6 +3282,8 @@ int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen)
assert(entrylen >= sizeof (__PHYSFS_DirTreeEntry));
memset(dt, '\0', sizeof (*dt));
dt->case_sensitive = case_sensitive;
dt->only_usascii = only_usascii;
dt->root = (__PHYSFS_DirTreeEntry *) allocator.Malloc(entrylen);
BAIL_IF(!dt->root, PHYSFS_ERR_OUT_OF_MEMORY, 0);
@ -3257,9 +3304,10 @@ int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen)
} /* __PHYSFS_DirTreeInit */
static inline PHYSFS_uint32 hashPathName(__PHYSFS_DirTree *dt, const char *name)
static PHYSFS_uint32 hashPathName(__PHYSFS_DirTree *dt, const char *name)
{
return __PHYSFS_hashString(name, strlen(name)) % dt->hashBuckets;
const PHYSFS_uint32 hashval = dt->case_sensitive ? __PHYSFS_hashString(name) : dt->only_usascii ? __PHYSFS_hashStringCaseFoldUSAscii(name) : __PHYSFS_hashStringCaseFold(name);
return hashval % dt->hashBuckets;
} /* hashPathName */
@ -3320,6 +3368,7 @@ void *__PHYSFS_DirTreeAdd(__PHYSFS_DirTree *dt, char *name, const int isdir)
/* Find the __PHYSFS_DirTreeEntry for a path in platform-independent notation. */
void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path)
{
const int cs = dt->case_sensitive;
PHYSFS_uint32 hashval;
__PHYSFS_DirTreeEntry *prev = NULL;
__PHYSFS_DirTreeEntry *retval;
@ -3330,7 +3379,8 @@ void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path)
hashval = hashPathName(dt, path);
for (retval = dt->hash[hashval]; retval; retval = retval->hashnext)
{
if (strcmp(retval->name, path) == 0)
const int cmp = cs ? strcmp(retval->name, path) : PHYSFS_utf8stricmp(retval->name, path);
if (cmp == 0)
{
if (prev != NULL) /* move this to the front of the list */
{

View File

@ -185,7 +185,7 @@ static int szipLoadEntries(SZIPinfo *info)
{
int retval = 0;
if (__PHYSFS_DirTreeInit(&info->tree, sizeof (SZIPentry)))
if (__PHYSFS_DirTreeInit(&info->tree, sizeof (SZIPentry), 1, 0))
{
const PHYSFS_uint32 count = info->db.NumFiles;
PHYSFS_uint32 i;

View File

@ -76,7 +76,7 @@ static void *GRP_openArchive(PHYSFS_Io *io, const char *name,
BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, &count, sizeof(count)), NULL);
count = PHYSFS_swapULE32(count);
unpkarc = UNPK_openArchive(io);
unpkarc = UNPK_openArchive(io, 0, 1);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!grpLoadEntries(io, count, unpkarc))

View File

@ -130,7 +130,7 @@ static void *HOG_openArchive(PHYSFS_Io *io, const char *name,
*claimed = 1;
unpkarc = UNPK_openArchive(io);
unpkarc = UNPK_openArchive(io, 0, 1);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!(hog1 ? hog1LoadEntries(io, unpkarc) : hog2LoadEntries(io, unpkarc)))

View File

@ -346,7 +346,8 @@ static void *ISO9660_openArchive(PHYSFS_Io *io, const char *filename,
if (!parseVolumeDescriptor(io, &rootpos, &len, &joliet, claimed))
return NULL;
unpkarc = UNPK_openArchive(io);
/* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
unpkarc = UNPK_openArchive(io, 1, 0);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!iso9660LoadEntries(io, joliet, "", rootpos, rootpos + len, unpkarc))

View File

@ -70,7 +70,7 @@ static void *MVL_openArchive(PHYSFS_Io *io, const char *name,
BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, &count, sizeof(count)), NULL);
count = PHYSFS_swapULE32(count);
unpkarc = UNPK_openArchive(io);
unpkarc = UNPK_openArchive(io, 0, 1);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!mvlLoadEntries(io, count, unpkarc))

View File

@ -86,7 +86,8 @@ static void *QPAK_openArchive(PHYSFS_Io *io, const char *name,
BAIL_IF_ERRPASS(!io->seek(io, pos), NULL);
unpkarc = UNPK_openArchive(io);
/* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
unpkarc = UNPK_openArchive(io, 1, 0);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!qpakLoadEntries(io, count, unpkarc))

View File

@ -94,7 +94,8 @@ static void *SLB_openArchive(PHYSFS_Io *io, const char *name,
/* seek to the table of contents */
BAIL_IF_ERRPASS(!io->seek(io, tocPos), NULL);
unpkarc = UNPK_openArchive(io);
/* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
unpkarc = UNPK_openArchive(io, 1, 0);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!slbLoadEntries(io, count, unpkarc))

View File

@ -285,12 +285,12 @@ void *UNPK_addEntry(void *opaque, char *name, const int isdir,
} /* UNPK_addEntry */
void *UNPK_openArchive(PHYSFS_Io *io)
void *UNPK_openArchive(PHYSFS_Io *io, const int case_sensitive, const int only_usascii)
{
UNPKinfo *info = (UNPKinfo *) allocator.Malloc(sizeof (UNPKinfo));
BAIL_IF(!info, PHYSFS_ERR_OUT_OF_MEMORY, NULL);
if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (UNPKentry)))
if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (UNPKentry), case_sensitive, only_usascii))
{
allocator.Free(info);
return NULL;

View File

@ -129,7 +129,8 @@ static void *VDF_openArchive(PHYSFS_Io *io, const char *name,
BAIL_IF_ERRPASS(!io->seek(io, rootCatOffset), NULL);
unpkarc = UNPK_openArchive(io);
/* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
unpkarc = UNPK_openArchive(io, 1, 0);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!vdfLoadEntries(io, count, vdfDosTimeToEpoch(timestamp), unpkarc))

View File

@ -95,7 +95,7 @@ static void *WAD_openArchive(PHYSFS_Io *io, const char *name,
BAIL_IF_ERRPASS(!io->seek(io, directoryOffset), 0);
unpkarc = UNPK_openArchive(io);
unpkarc = UNPK_openArchive(io, 0, 1);
BAIL_IF_ERRPASS(!unpkarc, NULL);
if (!wadLoadEntries(io, count, unpkarc))

View File

@ -1482,7 +1482,7 @@ static void *ZIP_openArchive(PHYSFS_Io *io, const char *name,
if (!zip_parse_end_of_central_dir(info, &dstart, &cdir_ofs, &count))
goto ZIP_openarchive_failed;
else if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (ZIPentry)))
else if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (ZIPentry), 1, 0))
goto ZIP_openarchive_failed;
root = (ZIPentry *) info->tree.root;

View File

@ -322,7 +322,18 @@ char *__PHYSFS_strdup(const char *str);
/*
* Give a hash value for a C string (uses djb's xor hashing algorithm).
*/
PHYSFS_uint32 __PHYSFS_hashString(const char *str, size_t len);
PHYSFS_uint32 __PHYSFS_hashString(const char *str);
/*
* Give a hash value for a C string (uses djb's xor hashing algorithm), case folding as it goes.
*/
PHYSFS_uint32 __PHYSFS_hashStringCaseFold(const char *str);
/*
* Give a hash value for a C string (uses djb's xor hashing algorithm), case folding as it goes,
* assuming that this is only US-ASCII chars (one byte per char, only 'A' through 'Z' need folding).
*/
PHYSFS_uint32 __PHYSFS_hashStringCaseFoldUSAscii(const char *str);
/*
@ -358,9 +369,10 @@ int __PHYSFS_readAll(PHYSFS_Io *io, void *buf, const size_t len);
/* These are shared between some archivers. */
/* LOTS of legacy formats that only use US ASCII, not actually UTF-8, so let them optimize here. */
void *UNPK_openArchive(PHYSFS_Io *io, const int case_sensitive, const int only_usascii);
void UNPK_abandonArchive(void *opaque);
void UNPK_closeArchive(void *opaque);
void *UNPK_openArchive(PHYSFS_Io *io);
void *UNPK_addEntry(void *opaque, char *name, const int isdir,
const PHYSFS_sint64 ctime, const PHYSFS_sint64 mtime,
const PHYSFS_uint64 pos, const PHYSFS_uint64 len);
@ -392,10 +404,13 @@ typedef struct __PHYSFS_DirTree
__PHYSFS_DirTreeEntry **hash; /* all entries hashed for fast lookup. */
size_t hashBuckets; /* number of buckets in hash. */
size_t entrylen; /* size in bytes of entries (including subclass). */
int case_sensitive; /* non-zero to treat entries as case-sensitive in DirTreeFind */
int only_usascii; /* non-zero to treat paths as US ASCII only (one byte per char, only 'A' through 'Z' are considered for case folding). */
} __PHYSFS_DirTree;
int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen);
/* LOTS of legacy formats that only use US ASCII, not actually UTF-8, so let them optimize here. */
int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen, const int case_sensitive, const int only_usascii);
void *__PHYSFS_DirTreeAdd(__PHYSFS_DirTree *dt, char *name, const int isdir);
void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path);
PHYSFS_EnumerateCallbackResult __PHYSFS_DirTreeEnumerate(void *opaque,
@ -725,6 +740,11 @@ int __PHYSFS_platformGrabMutex(void *mutex);
*/
void __PHYSFS_platformReleaseMutex(void *mutex);
/* !!! FIXME: move to public API? */
PHYSFS_uint32 __PHYSFS_utf8codepoint(const char **_str);
#if PHYSFS_HAVE_PRAGMA_VISIBILITY
#pragma GCC visibility pop
#endif

View File

@ -21,8 +21,8 @@
/*
* This may not be the best value, but it's one that isn't represented
* in Unicode (0x10FFFF is the largest codepoint value). We return this
* value from utf8codepoint() if there's bogus bits in the
* stream. utf8codepoint() will turn this value into something
* value from __PHYSFS_utf8codepoint() if there's bogus bits in the
* stream. __PHYSFS_utf8codepoint() will turn this value into something
* reasonable (like a question mark), for text that wants to try to recover,
* whereas utf8valid() will use the value to determine if a string has bad
* bits.
@ -35,7 +35,7 @@
*/
#define UNICODE_BOGUS_CHAR_CODEPOINT '?'
static PHYSFS_uint32 utf8codepoint(const char **_str)
PHYSFS_uint32 __PHYSFS_utf8codepoint(const char **_str)
{
const char *str = *_str;
PHYSFS_uint32 retval = 0;
@ -188,6 +188,11 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
} /* else if */
return UNICODE_BOGUS_CHAR_VALUE;
} /* __PHYSFS_utf8codepoint */
static inline PHYSFS_uint32 utf8codepoint(const char **_str)
{
return __PHYSFS_utf8codepoint(_str);
} /* utf8codepoint */
static PHYSFS_uint32 utf16codepoint(const PHYSFS_uint16 **_str)
@ -238,7 +243,7 @@ void PHYSFS_utf8ToUcs4(const char *src, PHYSFS_uint32 *dst, PHYSFS_uint64 len)
len -= sizeof (PHYSFS_uint32); /* save room for null char. */
while (len >= sizeof (PHYSFS_uint32))
{
PHYSFS_uint32 cp = utf8codepoint(&src);
PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src);
if (cp == 0)
break;
else if (cp == UNICODE_BOGUS_CHAR_VALUE)
@ -256,7 +261,7 @@ void PHYSFS_utf8ToUcs2(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
len -= sizeof (PHYSFS_uint16); /* save room for null char. */
while (len >= sizeof (PHYSFS_uint16))
{
PHYSFS_uint32 cp = utf8codepoint(&src);
PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src);
if (cp == 0)
break;
else if (cp == UNICODE_BOGUS_CHAR_VALUE)
@ -278,7 +283,7 @@ void PHYSFS_utf8ToUtf16(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
len -= sizeof (PHYSFS_uint16); /* save room for null char. */
while (len >= sizeof (PHYSFS_uint16))
{
PHYSFS_uint32 cp = utf8codepoint(&src);
PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src);
if (cp == 0)
break;
else if (cp == UNICODE_BOGUS_CHAR_VALUE)