diff -ruN lua-5.4.6/meson.build lua-5.4.6-patched/meson.build --- lua-5.4.6/meson.build Wed Feb 22 18:16:56 2023 +++ lua-5.4.6-patched/meson.build Wed Feb 22 04:10:01 2023 @@ -85,6 +85,7 @@ 'src/lutf8lib.c', 'src/lvm.c', 'src/lzio.c', + 'src/utf8_wrappers.c', dependencies: lua_lib_deps, version: meson.project_version(), soversion: lua_versions[0] + '.' + lua_versions[1], diff -ruN lua-5.4.6/src/luaconf.h lua-5.4.6-patched/src/luaconf.h --- lua-5.4.6/src/luaconf.h Thu Jan 13 19:24:43 2022 +++ lua-5.4.6-patched/src/luaconf.h Wed Feb 22 04:10:02 2023 @@ -782,5 +782,15 @@ +#if defined(lua_c) || defined(luac_c) || (defined(LUA_LIB) && \ + (defined(lauxlib_c) || defined(liolib_c) || \ + defined(loadlib_c) || defined(loslib_c))) +#include "utf8_wrappers.h" +#endif + + + + + #endif diff -ruN lua-5.4.6/src/Makefile lua-5.4.6-patched/src/Makefile --- lua-5.4.6/src/Makefile Thu Jul 15 22:01:52 2021 +++ lua-5.4.6-patched/src/Makefile Wed Feb 22 04:10:02 2023 @@ -33,7 +33,7 @@ PLATS= guess aix bsd c89 freebsd generic linux linux-readline macosx mingw posix solaris LUA_A= liblua.a -CORE_O= lapi.o lcode.o lctype.o ldebug.o ldo.o ldump.o lfunc.o lgc.o llex.o lmem.o lobject.o lopcodes.o lparser.o lstate.o lstring.o ltable.o ltm.o lundump.o lvm.o lzio.o +CORE_O= lapi.o lcode.o lctype.o ldebug.o ldo.o ldump.o lfunc.o lgc.o llex.o lmem.o lobject.o lopcodes.o lparser.o lstate.o lstring.o ltable.o ltm.o lundump.o lvm.o lzio.o utf8_wrappers.o LIB_O= lauxlib.o lbaselib.o lcorolib.o ldblib.o liolib.o lmathlib.o loadlib.o loslib.o lstrlib.o ltablib.o lutf8lib.o linit.o BASE_O= $(CORE_O) $(LIB_O) $(MYOBJS) diff -ruN lua-5.4.6/src/utf8_wrappers.c lua-5.4.6-patched/src/utf8_wrappers.c --- lua-5.4.6/src/utf8_wrappers.c Thu Jan 01 08:00:00 1970 +++ lua-5.4.6-patched/src/utf8_wrappers.c Wed Feb 22 18:13:45 2023 @@ -0,0 +1,129 @@ +/** + * Wrappers to provide Unicode (UTF-8) support on Windows. + * + * Copyright (c) 2018 Peter Wu + * SPDX-License-Identifier: (GPL-2.0-or-later OR MIT) + */ + +#ifdef _WIN32 +#include /* for MultiByteToWideChar */ +#include /* for _wrename */ +#include +#include +#include + +// A environment variable has the maximum length of 32767 characters +// including the terminator. +#define MAX_ENV_SIZE 32767 +// Set a high limit in case long paths are enabled. +#define MAX_PATH_SIZE 4096 +#define MAX_MODE_SIZE 128 +// cmd.exe argument length is reportedly limited to 8192. +#define MAX_CMD_SIZE 8192 + +static char env_value[MAX_ENV_SIZE]; + +FILE *fopen_utf8(const char *pathname, const char *mode) { + wchar_t pathname_w[MAX_PATH_SIZE]; + wchar_t mode_w[MAX_MODE_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE) || + !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) { + errno = EINVAL; + return NULL; + } + return _wfopen(pathname_w, mode_w); +} + +FILE *freopen_utf8(const char *pathname, const char *mode, FILE *stream) { + wchar_t pathname_w[MAX_PATH_SIZE]; + wchar_t mode_w[MAX_MODE_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE) || + !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) { + // Close stream as documented for the error case. + fclose(stream); + errno = EINVAL; + return NULL; + } + return _wfreopen(pathname_w, mode_w, stream); +} + +int remove_utf8(const char *pathname) { + wchar_t pathname_w[MAX_PATH_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE)) { + errno = EINVAL; + return -1; + } + return _wremove(pathname_w); +} + +int rename_utf8(const char *oldpath, const char *newpath) { + wchar_t oldpath_w[MAX_PATH_SIZE]; + wchar_t newpath_w[MAX_PATH_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, oldpath, -1, oldpath_w, MAX_PATH_SIZE) || + !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, newpath, -1, newpath_w, MAX_PATH_SIZE)) { + errno = EINVAL; + return -1; + } + return _wrename(oldpath_w, newpath_w); +} + +FILE *popen_utf8(const char *command, const char *mode) { + wchar_t command_w[MAX_CMD_SIZE]; + wchar_t mode_w[MAX_MODE_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, command, -1, command_w, MAX_CMD_SIZE) || + !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) { + errno = EINVAL; + return NULL; + } + return _wpopen(command_w, mode_w); +} + +int system_utf8(const char *command) { + wchar_t command_w[MAX_CMD_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, command, -1, command_w, MAX_CMD_SIZE)) { + errno = EINVAL; + return -1; + } + return _wsystem(command_w); +} + +DWORD GetModuleFileNameA_utf8(HMODULE hModule, LPSTR lpFilename, DWORD nSize) { + wchar_t filename_w[MAX_PATH + 1]; + if (!GetModuleFileNameW(hModule, filename_w, MAX_PATH + 1)) { + return 0; + } + return WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, filename_w, -1, lpFilename, nSize, NULL, NULL); +} + +HMODULE LoadLibraryExA_utf8(LPCSTR lpLibFileName, HANDLE hFile, DWORD dwFlags) { + wchar_t pathname_w[MAX_PATH_SIZE]; + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lpLibFileName, -1, pathname_w, MAX_PATH_SIZE)) { + SetLastError(ERROR_INVALID_NAME); + return NULL; + } + return LoadLibraryExW(pathname_w, hFile, dwFlags); +} + +char* getenv_utf8(const char *varname) { + /** This implementation is not thread safe. + * The string is only valid until the next call to getenv. + * This behavior is allowed per POSIX.1-2017 where it was said that: + * > The returned string pointer might be invalidated or the string content might be overwritten by a subsequent call to getenv(), setenv(), unsetenv(), or (if supported) putenv() but they shall not be affected by a call to any other function in this volume of POSIX.1-2017. + * > The returned string pointer might also be invalidated if the calling thread is terminated. + * > The getenv() function need not be thread-safe. + */ + wchar_t *value_w; + wchar_t varname_w[MAX_ENV_SIZE]; + + if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, varname, -1, varname_w, MAX_ENV_SIZE)) + return NULL; + value_w = _wgetenv((const wchar_t *) varname_w); + if (!value_w) + return NULL; + + if (!WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, value_w, -1, env_value, MAX_ENV_SIZE, NULL, NULL)) + return NULL; + + return env_value; +} +#endif diff -ruN lua-5.4.6/src/utf8_wrappers.h lua-5.4.6-patched/src/utf8_wrappers.h --- lua-5.4.6/src/utf8_wrappers.h Thu Jan 01 08:00:00 1970 +++ lua-5.4.6-patched/src/utf8_wrappers.h Wed Feb 22 18:09:48 2023 @@ -0,0 +1,46 @@ +/** + * Wrappers to provide Unicode (UTF-8) support on Windows. + * + * Copyright (c) 2018 Peter Wu + * SPDX-License-Identifier: (GPL-2.0-or-later OR MIT) + */ + +#ifdef _WIN32 + +#if defined(loadlib_c) || defined(lauxlib_c) || defined(liolib_c) || defined(luac_c) +#include /* for loadlib_c */ +FILE *fopen_utf8(const char *pathname, const char *mode); +#define fopen fopen_utf8 +#endif + +#ifdef lauxlib_c +#include +FILE *freopen_utf8(const char *pathname, const char *mode, FILE *stream); +#define freopen freopen_utf8 +#endif + +#ifdef liolib_c +FILE *popen_utf8(const char *command, const char *mode); +#define _popen popen_utf8 +#endif + +#ifdef loslib_c +#include +int remove_utf8(const char *pathname); +int rename_utf8(const char *oldpath, const char *newpath); +int system_utf8(const char *command); +char *getenv_utf8(const char *varname); +#define remove remove_utf8 +#define rename rename_utf8 +#define system system_utf8 +#define getenv getenv_utf8 +#endif + +#ifdef loadlib_c +#include +DWORD GetModuleFileNameA_utf8(HMODULE hModule, LPSTR lpFilename, DWORD nSize); +HMODULE LoadLibraryExA_utf8(LPCSTR lpLibFileName, HANDLE hFile, DWORD dwFlags); +#define GetModuleFileNameA GetModuleFileNameA_utf8 +#define LoadLibraryExA LoadLibraryExA_utf8 +#endif +#endif