utf8: big improvements to case-insensitive UTF-8 string compare.
- Dramatically reduce RAM usage: uses between 8 and 11 kilobytes less static memory for its internal case-folding tables. - Actually works now. It would fail unconditionally if a codepoint folded into multiple codepoints, even if the compared string contained those exact codepoints. - Now a public API! - Removed __PHYSFS_utf8strnicmp(): nothing was using it, it was incorrect anyhow, and what does 'n' represent when either string might case-fold to something larger in-flight, anyhow?
This commit is contained in:
parent
587ec88a0d
commit
d1f2637ca8
|
@ -3,6 +3,11 @@
|
|||
use warnings;
|
||||
use strict;
|
||||
|
||||
my $HASHBUCKETS1_16 = 256;
|
||||
my $HASHBUCKETS1_32 = 16;
|
||||
my $HASHBUCKETS2_16 = 16;
|
||||
my $HASHBUCKETS3_16 = 4;
|
||||
|
||||
print <<__EOF__;
|
||||
/*
|
||||
* This file is part of PhysicsFS (https://icculus.org/physfs/)
|
||||
|
@ -13,17 +18,97 @@ print <<__EOF__;
|
|||
* Please see the file LICENSE.txt in the source's root directory.
|
||||
*/
|
||||
|
||||
#ifndef _INCLUDE_PHYSFS_CASEFOLDING_H_
|
||||
#define _INCLUDE_PHYSFS_CASEFOLDING_H_
|
||||
|
||||
#ifndef __PHYSICSFS_INTERNAL__
|
||||
#error Do not include this header from your applications.
|
||||
#endif
|
||||
|
||||
/* We build three simple hashmaps here: one that maps Unicode codepoints to
|
||||
a one, two, or three lowercase codepoints. To retrieve this info: look at
|
||||
case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one,
|
||||
a few dozen fold to two, and a handful fold to three. If the codepoint isn't
|
||||
in any of these hashes, it doesn't fold (no separate upper and lowercase).
|
||||
|
||||
Almost all these codepoints fit into 16 bits, so we hash them as such to save
|
||||
memory. If a codepoint is > 0xFFFF, we have separate hashes for them,
|
||||
since there are (currently) only about 120 of them and (currently) all of them
|
||||
map to a single lowercase codepoint. */
|
||||
|
||||
typedef struct CaseFoldMapping1_32
|
||||
{
|
||||
PHYSFS_uint32 from;
|
||||
PHYSFS_uint32 to0;
|
||||
} CaseFoldMapping1_32;
|
||||
|
||||
typedef struct CaseFoldMapping1_16
|
||||
{
|
||||
PHYSFS_uint16 from;
|
||||
PHYSFS_uint16 to0;
|
||||
} CaseFoldMapping1_16;
|
||||
|
||||
typedef struct CaseFoldMapping2_16
|
||||
{
|
||||
PHYSFS_uint16 from;
|
||||
PHYSFS_uint16 to0;
|
||||
PHYSFS_uint16 to1;
|
||||
} CaseFoldMapping2_16;
|
||||
|
||||
typedef struct CaseFoldMapping3_16
|
||||
{
|
||||
PHYSFS_uint16 from;
|
||||
PHYSFS_uint16 to0;
|
||||
PHYSFS_uint16 to1;
|
||||
PHYSFS_uint16 to2;
|
||||
} CaseFoldMapping3_16;
|
||||
|
||||
typedef struct CaseFoldHashBucket1_16
|
||||
{
|
||||
const CaseFoldMapping1_16 *list;
|
||||
const PHYSFS_uint8 count;
|
||||
} CaseFoldHashBucket1_16;
|
||||
|
||||
typedef struct CaseFoldHashBucket1_32
|
||||
{
|
||||
const CaseFoldMapping1_32 *list;
|
||||
const PHYSFS_uint8 count;
|
||||
} CaseFoldHashBucket1_32;
|
||||
|
||||
typedef struct CaseFoldHashBucket2_16
|
||||
{
|
||||
const CaseFoldMapping2_16 *list;
|
||||
const PHYSFS_uint8 count;
|
||||
} CaseFoldHashBucket2_16;
|
||||
|
||||
typedef struct CaseFoldHashBucket3_16
|
||||
{
|
||||
const CaseFoldMapping3_16 *list;
|
||||
const PHYSFS_uint8 count;
|
||||
} CaseFoldHashBucket3_16;
|
||||
|
||||
__EOF__
|
||||
|
||||
|
||||
my @foldPairs;
|
||||
my @foldPairs1_16;
|
||||
my @foldPairs2_16;
|
||||
my @foldPairs3_16;
|
||||
my @foldPairs1_32;
|
||||
|
||||
for (my $i = 0; $i < 256; $i++) {
|
||||
$foldPairs[$i] = '';
|
||||
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
|
||||
$foldPairs1_16[$i] = '';
|
||||
}
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
|
||||
$foldPairs1_32[$i] = '';
|
||||
}
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
|
||||
$foldPairs2_16[$i] = '';
|
||||
}
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
|
||||
$foldPairs3_16[$i] = '';
|
||||
}
|
||||
|
||||
open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n");
|
||||
|
@ -38,47 +123,153 @@ while (<FH>) {
|
|||
|
||||
next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/;
|
||||
my ($code, $status, $mapping) = ($1, $2, $3);
|
||||
|
||||
my $hexxed = hex($code);
|
||||
my $hashed = (($hexxed ^ ($hexxed >> 8)) & 0xFF);
|
||||
#print("// code '$code' status '$status' mapping '$mapping'\n");
|
||||
#print("// hexxed '$hexxed' hashed '$hashed'\n");
|
||||
|
||||
if (($status eq 'C') or ($status eq 'F')) {
|
||||
my ($map1, $map2, $map3) = ('0000', '0000', '0000');
|
||||
my ($map1, $map2, $map3) = (undef, undef, undef);
|
||||
$map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
|
||||
$map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
|
||||
$map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
|
||||
die("mapping space too small for '$code'\n") if ($mapping ne '');
|
||||
$foldPairs[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
|
||||
die("problem parsing mapping for '$code'\n") if (not defined($map1));
|
||||
|
||||
if ($hexxed < 128) {
|
||||
# Just ignore these, we'll handle the low-ASCII ones ourselves.
|
||||
} elsif ($hexxed > 0xFFFF) {
|
||||
# We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here.
|
||||
die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2);
|
||||
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1));
|
||||
#print("// hexxed '$hexxed' hashed1 '$hashed'\n");
|
||||
$foldPairs1_32[$hashed] .= " { 0x$code, 0x$map1 },\n";
|
||||
} elsif (not defined($map2)) {
|
||||
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1));
|
||||
#print("// hexxed '$hexxed' hashed1 '$hashed'\n");
|
||||
$foldPairs1_16[$hashed] .= " { 0x$code, 0x$map1 },\n";
|
||||
} elsif (not defined($map3)) {
|
||||
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1));
|
||||
#print("// hexxed '$hexxed' hashed2 '$hashed'\n");
|
||||
$foldPairs2_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2 },\n";
|
||||
} else {
|
||||
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1));
|
||||
#print("// hexxed '$hexxed' hashed3 '$hashed'\n");
|
||||
$foldPairs3_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FH);
|
||||
|
||||
for (my $i = 0; $i < 256; $i++) {
|
||||
$foldPairs[$i] =~ s/,\n\Z//;
|
||||
my $str = $foldPairs[$i];
|
||||
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
|
||||
$foldPairs1_16[$i] =~ s/,\n\Z//;
|
||||
my $str = $foldPairs1_16[$i];
|
||||
next if $str eq '';
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold_${num}";
|
||||
print("static const CaseFoldMapping ${sym}[] = {\n$str\n};\n\n");
|
||||
my $sym = "case_fold1_16_${num}";
|
||||
print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n");
|
||||
}
|
||||
|
||||
print("\nstatic const CaseFoldHashBucket case_fold_hash[256] = {\n");
|
||||
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
|
||||
$foldPairs1_32[$i] =~ s/,\n\Z//;
|
||||
my $str = $foldPairs1_32[$i];
|
||||
next if $str eq '';
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold1_32_${num}";
|
||||
print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n");
|
||||
}
|
||||
|
||||
for (my $i = 0; $i < 256; $i++) {
|
||||
my $str = $foldPairs[$i];
|
||||
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
|
||||
$foldPairs2_16[$i] =~ s/,\n\Z//;
|
||||
my $str = $foldPairs2_16[$i];
|
||||
next if $str eq '';
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold2_16_${num}";
|
||||
print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n");
|
||||
}
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
|
||||
$foldPairs3_16[$i] =~ s/,\n\Z//;
|
||||
my $str = $foldPairs3_16[$i];
|
||||
next if $str eq '';
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold3_16_${num}";
|
||||
print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n");
|
||||
}
|
||||
|
||||
print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n");
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
|
||||
my $str = $foldPairs1_16[$i];
|
||||
if ($str eq '') {
|
||||
print(" { 0, NULL },\n");
|
||||
print(" { NULL, 0 },\n");
|
||||
} else {
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold_${num}";
|
||||
print(" { __PHYSFS_ARRAYLEN($sym), $sym },\n");
|
||||
my $sym = "case_fold1_16_${num}";
|
||||
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
|
||||
}
|
||||
}
|
||||
print("};\n\n");
|
||||
|
||||
|
||||
print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n");
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
|
||||
my $str = $foldPairs1_32[$i];
|
||||
if ($str eq '') {
|
||||
print(" { NULL, 0 },\n");
|
||||
} else {
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold1_32_${num}";
|
||||
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
|
||||
}
|
||||
}
|
||||
print("};\n\n");
|
||||
|
||||
|
||||
print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n");
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
|
||||
my $str = $foldPairs2_16[$i];
|
||||
if ($str eq '') {
|
||||
print(" { NULL, 0 },\n");
|
||||
} else {
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold2_16_${num}";
|
||||
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
|
||||
}
|
||||
}
|
||||
print("};\n\n");
|
||||
|
||||
print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n");
|
||||
|
||||
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
|
||||
my $str = $foldPairs3_16[$i];
|
||||
if ($str eq '') {
|
||||
print(" { NULL, 0 },\n");
|
||||
} else {
|
||||
my $num = '000' . $i;
|
||||
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
|
||||
my $sym = "case_fold3_16_${num}";
|
||||
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
|
||||
}
|
||||
}
|
||||
print("};\n\n");
|
||||
|
||||
print <<__EOF__;
|
||||
|
||||
#endif /* _INCLUDE_PHYSFS_CASEFOLDING_H_ */
|
||||
|
||||
/* end of physfs_casefolding.h ... */
|
||||
|
||||
__EOF__
|
||||
|
||||
exit 0;
|
||||
|
||||
# end of makecashfoldhashtable.pl ...
|
||||
|
|
10
src/physfs.c
10
src/physfs.c
|
@ -891,14 +891,14 @@ static DirHandle *openDirectory(PHYSFS_Io *io, const char *d, int forWriting)
|
|||
/* Look for archivers with matching file extensions first... */
|
||||
for (i = archivers; (*i != NULL) && (retval == NULL); i++)
|
||||
{
|
||||
if (__PHYSFS_utf8stricmp(ext, (*i)->info.extension) == 0)
|
||||
if (PHYSFS_utf8stricmp(ext, (*i)->info.extension) == 0)
|
||||
retval = tryOpenDir(io, *i, d, forWriting);
|
||||
} /* for */
|
||||
|
||||
/* failing an exact file extension match, try all the others... */
|
||||
for (i = archivers; (*i != NULL) && (retval == NULL); i++)
|
||||
{
|
||||
if (__PHYSFS_utf8stricmp(ext, (*i)->info.extension) != 0)
|
||||
if (PHYSFS_utf8stricmp(ext, (*i)->info.extension) != 0)
|
||||
retval = tryOpenDir(io, *i, d, forWriting);
|
||||
} /* for */
|
||||
} /* if */
|
||||
|
@ -1442,7 +1442,7 @@ static int doRegisterArchiver(const PHYSFS_Archiver *_archiver)
|
|||
ext = _archiver->info.extension;
|
||||
for (i = 0; i < numArchivers; i++)
|
||||
{
|
||||
if (__PHYSFS_utf8stricmp(archiveInfo[i]->extension, ext) == 0)
|
||||
if (PHYSFS_utf8stricmp(archiveInfo[i]->extension, ext) == 0)
|
||||
BAIL(PHYSFS_ERR_DUPLICATE, 0);
|
||||
} /* for */
|
||||
|
||||
|
@ -1518,7 +1518,7 @@ int PHYSFS_deregisterArchiver(const char *ext)
|
|||
__PHYSFS_platformGrabMutex(stateLock);
|
||||
for (i = 0; i < numArchivers; i++)
|
||||
{
|
||||
if (__PHYSFS_utf8stricmp(archiveInfo[i]->extension, ext) == 0)
|
||||
if (PHYSFS_utf8stricmp(archiveInfo[i]->extension, ext) == 0)
|
||||
{
|
||||
const int retval = doDeregisterArchiver(i);
|
||||
__PHYSFS_platformReleaseMutex(stateLock);
|
||||
|
@ -1921,7 +1921,7 @@ int PHYSFS_setSaneConfig(const char *organization, const char *appName,
|
|||
if ((l > extlen) && ((*i)[l - extlen - 1] == '.'))
|
||||
{
|
||||
ext = (*i) + (l - extlen);
|
||||
if (__PHYSFS_utf8stricmp(ext, archiveExt) == 0)
|
||||
if (PHYSFS_utf8stricmp(ext, archiveExt) == 0)
|
||||
setSaneCfgAddPath(*i, l, dirsep, archivesFirst);
|
||||
} /* if */
|
||||
} /* for */
|
||||
|
|
24
src/physfs.h
24
src/physfs.h
|
@ -2521,6 +2521,30 @@ PHYSFS_DECL void PHYSFS_utf8FromLatin1(const char *src, char *dst,
|
|||
|
||||
/* Everything above this line is part of the PhysicsFS 2.0 API. */
|
||||
|
||||
/**
|
||||
* \fn int PHYSFS_utf8stricmp(const char *str1, const char *str2)
|
||||
* \brief Case-insensitive compare of two UTF-8 strings.
|
||||
*
|
||||
* This is a strcasecmp/stricmp replacement that expects both strings
|
||||
* to be in UTF-8 encoding. It will do "case folding" to decide if the
|
||||
* Unicode codepoints in the strings match.
|
||||
*
|
||||
* It will report which string is "greater than" the other, but be aware that
|
||||
* this doesn't necessarily mean anything: 'a' may be "less than" 'b', but
|
||||
* a Japanese kuten has no meaningful alphabetically relationship to
|
||||
* a Greek lambda, but being able to assign a reliable "value" makes sorting
|
||||
* algorithms possible, if not entirely sane. Most cases should treat the
|
||||
* return value as "equal" or "not equal".
|
||||
*
|
||||
* Like stricmp, this expects both strings to be NULL-terminated.
|
||||
*
|
||||
* \param str1 First string to compare.
|
||||
* \param str2 Second string to compare.
|
||||
* \return -1 if str1 is "less than" str2, 1 if "greater than", 0 if equal.
|
||||
*/
|
||||
PHYSFS_DECL int PHYSFS_utf8stricmp(const char *str1, const char *str2);
|
||||
|
||||
|
||||
/**
|
||||
* \fn int PHYSFS_unmount(const char *oldDir)
|
||||
* \brief Remove a directory or archive from the search path.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -290,27 +290,6 @@ void __PHYSFS_sort(void *entries, size_t max,
|
|||
((s) < (__PHYSFS_UI64(0xFFFFFFFFFFFFFFFF) >> (64-(sizeof(size_t)*8)))) \
|
||||
)
|
||||
|
||||
|
||||
/*
|
||||
* This is a strcasecmp() or stricmp() replacement that expects both strings
|
||||
* to be in UTF-8 encoding. It will do "case folding" to decide if the
|
||||
* Unicode codepoints in the strings match.
|
||||
*
|
||||
* It will report which string is "greater than" the other, but be aware that
|
||||
* this doesn't necessarily mean anything: 'a' may be "less than" 'b', but
|
||||
* a random Kanji codepoint has no meaningful alphabetically relationship to
|
||||
* a Greek Lambda, but being able to assign a reliable "value" makes sorting
|
||||
* algorithms possible, if not entirely sane. Most cases should treat the
|
||||
* return value as "equal" or "not equal".
|
||||
*/
|
||||
int __PHYSFS_utf8stricmp(const char *s1, const char *s2);
|
||||
|
||||
/*
|
||||
* This works like __PHYSFS_utf8stricmp(), but takes a character (NOT BYTE
|
||||
* COUNT) argument, like strcasencmp().
|
||||
*/
|
||||
int __PHYSFS_utf8strnicmp(const char *s1, const char *s2, PHYSFS_uint32 l);
|
||||
|
||||
/*
|
||||
* stricmp() that guarantees to only work with low ASCII. The C runtime
|
||||
* stricmp() might try to apply a locale/codepage/etc, which we don't want.
|
||||
|
|
|
@ -206,7 +206,7 @@ static char *cvtPathToCorrectCase(char *buf)
|
|||
cmp = __PHYSFS_stricmpASCII(utf8, fname);
|
||||
else
|
||||
{
|
||||
cmp = __PHYSFS_utf8stricmp(utf8, fname);
|
||||
cmp = PHYSFS_utf8stricmp(utf8, fname);
|
||||
allocator.Free(utf8);
|
||||
} /* else */
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#define __PHYSICSFS_INTERNAL__
|
||||
#include "physfs_internal.h"
|
||||
|
||||
#include "physfs_casefolding.h"
|
||||
|
||||
|
||||
/*
|
||||
* From rfc3629, the UTF-8 spec:
|
||||
|
@ -402,112 +404,134 @@ void PHYSFS_utf8FromUtf16(const PHYSFS_uint16 *src, char *dst, PHYSFS_uint64 len
|
|||
} /* PHYSFS_utf8FromUtf16 */
|
||||
|
||||
|
||||
typedef struct CaseFoldMapping
|
||||
/* (to) should point to at least 3 PHYSFS_uint32 slots. */
|
||||
static int locate_casefold_mapping(const PHYSFS_uint32 from, PHYSFS_uint32 *to)
|
||||
{
|
||||
PHYSFS_uint32 from;
|
||||
PHYSFS_uint32 to0;
|
||||
PHYSFS_uint32 to1;
|
||||
PHYSFS_uint32 to2;
|
||||
} CaseFoldMapping;
|
||||
int i;
|
||||
|
||||
typedef struct CaseFoldHashBucket
|
||||
{
|
||||
const PHYSFS_uint8 count;
|
||||
const CaseFoldMapping *list;
|
||||
} CaseFoldHashBucket;
|
||||
|
||||
#include "physfs_casefolding.h"
|
||||
|
||||
static void locate_case_fold_mapping(const PHYSFS_uint32 from,
|
||||
PHYSFS_uint32 *to)
|
||||
{
|
||||
PHYSFS_uint32 i;
|
||||
const PHYSFS_uint8 hashed = ((from ^ (from >> 8)) & 0xFF);
|
||||
const CaseFoldHashBucket *bucket = &case_fold_hash[hashed];
|
||||
const CaseFoldMapping *mapping = bucket->list;
|
||||
|
||||
for (i = 0; i < bucket->count; i++, mapping++)
|
||||
if (from < 128) /* low-ASCII, easy! */
|
||||
{
|
||||
if (mapping->from == from)
|
||||
if ((from >= 'A') && (from <= 'Z'))
|
||||
*to = from - ('A' - 'a');
|
||||
else
|
||||
*to = from;
|
||||
return 1;
|
||||
} /* if */
|
||||
|
||||
else if (from <= 0xFFFF)
|
||||
{
|
||||
const PHYSFS_uint8 hash = ((from ^ (from >> 8)) & 0xFF);
|
||||
const PHYSFS_uint16 from16 = (PHYSFS_uint16) from;
|
||||
|
||||
{
|
||||
to[0] = mapping->to0;
|
||||
to[1] = mapping->to1;
|
||||
to[2] = mapping->to2;
|
||||
return;
|
||||
} /* if */
|
||||
} /* for */
|
||||
const CaseFoldHashBucket1_16 *bucket = &case_fold_hash1_16[hash];
|
||||
const int count = (int) bucket->count;
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
const CaseFoldMapping1_16 *mapping = &bucket->list[i];
|
||||
if (mapping->from == from16)
|
||||
{
|
||||
*to = mapping->to0;
|
||||
return 1;
|
||||
} /* if */
|
||||
} /* for */
|
||||
}
|
||||
|
||||
{
|
||||
const CaseFoldHashBucket2_16 *bucket = &case_fold_hash2_16[hash & 15];
|
||||
const int count = (int) bucket->count;
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
const CaseFoldMapping2_16 *mapping = &bucket->list[i];
|
||||
if (mapping->from == from16)
|
||||
{
|
||||
to[0] = mapping->to0;
|
||||
to[1] = mapping->to1;
|
||||
return 2;
|
||||
} /* if */
|
||||
} /* for */
|
||||
}
|
||||
|
||||
{
|
||||
const CaseFoldHashBucket3_16 *bucket = &case_fold_hash3_16[hash & 3];
|
||||
const int count = (int) bucket->count;
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
const CaseFoldMapping3_16 *mapping = &bucket->list[i];
|
||||
if (mapping->from == from16)
|
||||
{
|
||||
to[0] = mapping->to0;
|
||||
to[1] = mapping->to1;
|
||||
to[2] = mapping->to2;
|
||||
return 3;
|
||||
} /* if */
|
||||
} /* for */
|
||||
}
|
||||
} /* else if */
|
||||
|
||||
else /* codepoint that doesn't fit in 16 bits. */
|
||||
{
|
||||
const PHYSFS_uint8 hash = ((from ^ (from >> 8)) & 0xFF);
|
||||
const CaseFoldHashBucket1_32 *bucket = &case_fold_hash1_32[hash & 15];
|
||||
const int count = (int) bucket->count;
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
const CaseFoldMapping1_32 *mapping = &bucket->list[i];
|
||||
if (mapping->from == from)
|
||||
{
|
||||
*to = mapping->to0;
|
||||
return 1;
|
||||
} /* if */
|
||||
} /* for */
|
||||
} /* else */
|
||||
|
||||
|
||||
/* Not found...there's no remapping for this codepoint. */
|
||||
to[0] = from;
|
||||
to[1] = 0;
|
||||
to[2] = 0;
|
||||
} /* locate_case_fold_mapping */
|
||||
*to = from;
|
||||
return 1;
|
||||
} /* locate_casefold_mapping */
|
||||
|
||||
|
||||
/* !!! FIXME-3.0: this doesn't actually work (for example, it folds the German Eszett
|
||||
into 's' 's', but if you have two 'S' chars in a row, it'll fail on the first one,
|
||||
since it'll fold into a single 's'. This needs to be able to lurch along with a
|
||||
variable number of codepoints at a time. */
|
||||
static int utf8codepointcmp(const PHYSFS_uint32 cp1, const PHYSFS_uint32 cp2)
|
||||
int PHYSFS_utf8stricmp(const char *str1, const char *str2)
|
||||
{
|
||||
PHYSFS_uint32 folded1[3], folded2[3];
|
||||
int head1 = 0;
|
||||
int tail1 = 0;
|
||||
int head2 = 0;
|
||||
int tail2 = 0;
|
||||
|
||||
if (cp1 == cp2)
|
||||
return 0; /* obviously matches. */
|
||||
|
||||
locate_case_fold_mapping(cp1, folded1);
|
||||
locate_case_fold_mapping(cp2, folded2);
|
||||
|
||||
if (folded1[0] < folded2[0])
|
||||
return -1;
|
||||
else if (folded1[0] > folded2[0])
|
||||
return 1;
|
||||
else if (folded1[1] < folded2[1])
|
||||
return -1;
|
||||
else if (folded1[1] > folded2[1])
|
||||
return 1;
|
||||
else if (folded1[2] < folded2[2])
|
||||
return -1;
|
||||
else if (folded1[2] > folded2[2])
|
||||
return 1;
|
||||
|
||||
return 0; /* complete match. */
|
||||
} /* utf8codepointcmp */
|
||||
|
||||
|
||||
int __PHYSFS_utf8stricmp(const char *str1, const char *str2)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
const PHYSFS_uint32 cp1 = utf8codepoint(&str1);
|
||||
const PHYSFS_uint32 cp2 = utf8codepoint(&str2);
|
||||
const int rc = utf8codepointcmp(cp1, cp2);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
PHYSFS_uint32 cp1, cp2;
|
||||
|
||||
if (head1 != tail1)
|
||||
cp1 = folded1[tail1++];
|
||||
else
|
||||
{
|
||||
head1 = locate_casefold_mapping(utf8codepoint(&str1), folded1);
|
||||
cp1 = folded1[0];
|
||||
tail1 = 1;
|
||||
} /* else */
|
||||
|
||||
if (head2 != tail2)
|
||||
cp2 = folded2[tail2++];
|
||||
else
|
||||
{
|
||||
head2 = locate_casefold_mapping(utf8codepoint(&str2), folded2);
|
||||
cp2 = folded2[0];
|
||||
tail2 = 1;
|
||||
} /* else */
|
||||
|
||||
if (cp1 < cp2)
|
||||
return -1;
|
||||
else if (cp1 > cp2)
|
||||
return 1;
|
||||
else if (cp1 == 0)
|
||||
break; /* complete match. */
|
||||
break; /* complete match. */
|
||||
} /* while */
|
||||
|
||||
return 0;
|
||||
} /* __PHYSFS_utf8stricmp */
|
||||
|
||||
|
||||
int __PHYSFS_utf8strnicmp(const char *str1, const char *str2, PHYSFS_uint32 n)
|
||||
{
|
||||
while (n > 0)
|
||||
{
|
||||
const PHYSFS_uint32 cp1 = utf8codepoint(&str1);
|
||||
const PHYSFS_uint32 cp2 = utf8codepoint(&str2);
|
||||
const int rc = utf8codepointcmp(cp1, cp2);
|
||||
if (rc != 0)
|
||||
return rc;
|
||||
else if (cp1 == 0)
|
||||
return 0;
|
||||
n--;
|
||||
} /* while */
|
||||
|
||||
return 0; /* matched to n chars. */
|
||||
} /* __PHYSFS_utf8strnicmp */
|
||||
} /* PHYSFS_utf8stricmp */
|
||||
|
||||
|
||||
int __PHYSFS_stricmpASCII(const char *str1, const char *str2)
|
||||
|
|
Loading…
Reference in New Issue