2024-11-29 08:21:08 +01:00
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Jan200101 <sentrycraft123@gmail.com>
Date: Fri, 29 Nov 2024 08:19:27 +0100
Subject: [PATCH] add utf8 wrapper
Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
meson.build | 1 +
src/Makefile | 2 +-
src/luaconf.h | 10 ++++
src/utf8_wrappers.c | 129 ++++++++++++++++++++++++++++++++++++++++++++
src/utf8_wrappers.h | 46 ++++++++++++++++
5 files changed, 187 insertions(+), 1 deletion(-)
create mode 100644 src/utf8_wrappers.c
create mode 100644 src/utf8_wrappers.h
diff --git a/meson.build b/meson.build
index ffd115c..31efb31 100644
--- a/meson.build
+++ b/meson.build
@@ -82,6 +82,7 @@ lua_lib = library(
2022-06-11 15:01:12 +02:00
'src/lutf8lib.c',
'src/lvm.c',
'src/lzio.c',
+ 'src/utf8_wrappers.c',
dependencies: lua_lib_deps,
2022-11-16 04:23:45 +01:00
version: meson.project_version(),
soversion: lua_versions[0] + '.' + lua_versions[1],
2024-11-29 08:21:08 +01:00
diff --git a/src/Makefile b/src/Makefile
index b771196..6d3ff24 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -33,7 +33,7 @@ CMCFLAGS=
PLATS= guess aix bsd c89 freebsd generic ios linux linux-readline macosx mingw posix solaris
LUA_A= liblua.a
-CORE_O= lapi.o lcode.o lctype.o ldebug.o ldo.o ldump.o lfunc.o lgc.o llex.o lmem.o lobject.o lopcodes.o lparser.o lstate.o lstring.o ltable.o ltm.o lundump.o lvm.o lzio.o
+CORE_O= lapi.o lcode.o lctype.o ldebug.o ldo.o ldump.o lfunc.o lgc.o llex.o lmem.o lobject.o lopcodes.o lparser.o lstate.o lstring.o ltable.o ltm.o lundump.o lvm.o lzio.o utf8_wrappers.o
LIB_O= lauxlib.o lbaselib.o lcorolib.o ldblib.o liolib.o lmathlib.o loadlib.o loslib.o lstrlib.o ltablib.o lutf8lib.o linit.o
BASE_O= $(CORE_O) $(LIB_O) $(MYOBJS)
diff --git a/src/luaconf.h b/src/luaconf.h
index 137103e..3cd0d53 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -789,5 +789,15 @@
2022-06-11 15:01:12 +02:00
+#if defined(lua_c) || defined(luac_c) || (defined(LUA_LIB) && \
+ (defined(lauxlib_c) || defined(liolib_c) || \
+ defined(loadlib_c) || defined(loslib_c)))
+#include "utf8_wrappers.h"
+#endif
+
+
+
+
+
#endif
2024-11-29 08:21:08 +01:00
diff --git a/src/utf8_wrappers.c b/src/utf8_wrappers.c
new file mode 100644
index 0000000..5b9b1f6
--- /dev/null
+++ b/src/utf8_wrappers.c
2023-04-07 19:11:01 +02:00
@@ -0,0 +1,129 @@
2022-06-11 15:01:12 +02:00
+/**
+ * Wrappers to provide Unicode (UTF-8) support on Windows.
+ *
+ * Copyright (c) 2018 Peter Wu <peter@lekensteyn.nl>
+ * SPDX-License-Identifier: (GPL-2.0-or-later OR MIT)
+ */
+
+#ifdef _WIN32
+#include <windows.h> /* for MultiByteToWideChar */
+#include <wchar.h> /* for _wrename */
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
2023-04-07 19:11:01 +02:00
+// A environment variable has the maximum length of 32767 characters
+// including the terminator.
+#define MAX_ENV_SIZE 32767
2022-06-11 15:01:12 +02:00
+// Set a high limit in case long paths are enabled.
+#define MAX_PATH_SIZE 4096
+#define MAX_MODE_SIZE 128
+// cmd.exe argument length is reportedly limited to 8192.
+#define MAX_CMD_SIZE 8192
+
2023-04-07 19:11:01 +02:00
+static char env_value[MAX_ENV_SIZE];
+
2022-06-11 15:01:12 +02:00
+FILE *fopen_utf8(const char *pathname, const char *mode) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ wchar_t mode_w[MAX_MODE_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return _wfopen(pathname_w, mode_w);
+}
+
+FILE *freopen_utf8(const char *pathname, const char *mode, FILE *stream) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ wchar_t mode_w[MAX_MODE_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) {
+ // Close stream as documented for the error case.
+ fclose(stream);
+ errno = EINVAL;
+ return NULL;
+ }
+ return _wfreopen(pathname_w, mode_w, stream);
+}
+
+int remove_utf8(const char *pathname) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return _wremove(pathname_w);
+}
+
+int rename_utf8(const char *oldpath, const char *newpath) {
+ wchar_t oldpath_w[MAX_PATH_SIZE];
+ wchar_t newpath_w[MAX_PATH_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, oldpath, -1, oldpath_w, MAX_PATH_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, newpath, -1, newpath_w, MAX_PATH_SIZE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return _wrename(oldpath_w, newpath_w);
+}
+
+FILE *popen_utf8(const char *command, const char *mode) {
+ wchar_t command_w[MAX_CMD_SIZE];
+ wchar_t mode_w[MAX_MODE_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, command, -1, command_w, MAX_CMD_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return _wpopen(command_w, mode_w);
+}
+
+int system_utf8(const char *command) {
+ wchar_t command_w[MAX_CMD_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, command, -1, command_w, MAX_CMD_SIZE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return _wsystem(command_w);
+}
+
+DWORD GetModuleFileNameA_utf8(HMODULE hModule, LPSTR lpFilename, DWORD nSize) {
+ wchar_t filename_w[MAX_PATH + 1];
+ if (!GetModuleFileNameW(hModule, filename_w, MAX_PATH + 1)) {
+ return 0;
+ }
+ return WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, filename_w, -1, lpFilename, nSize, NULL, NULL);
+}
+
+HMODULE LoadLibraryExA_utf8(LPCSTR lpLibFileName, HANDLE hFile, DWORD dwFlags) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lpLibFileName, -1, pathname_w, MAX_PATH_SIZE)) {
+ SetLastError(ERROR_INVALID_NAME);
+ return NULL;
+ }
+ return LoadLibraryExW(pathname_w, hFile, dwFlags);
+}
2023-04-07 19:11:01 +02:00
+
+char* getenv_utf8(const char *varname) {
+ /** This implementation is not thread safe.
+ * The string is only valid until the next call to getenv.
+ * This behavior is allowed per POSIX.1-2017 where it was said that:
+ * > The returned string pointer might be invalidated or the string content might be overwritten by a subsequent call to getenv(), setenv(), unsetenv(), or (if supported) putenv() but they shall not be affected by a call to any other function in this volume of POSIX.1-2017.
+ * > The returned string pointer might also be invalidated if the calling thread is terminated.
+ * > The getenv() function need not be thread-safe.
+ */
+ wchar_t *value_w;
+ wchar_t varname_w[MAX_ENV_SIZE];
+
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, varname, -1, varname_w, MAX_ENV_SIZE))
+ return NULL;
+ value_w = _wgetenv((const wchar_t *) varname_w);
+ if (!value_w)
+ return NULL;
+
+ if (!WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, value_w, -1, env_value, MAX_ENV_SIZE, NULL, NULL))
+ return NULL;
+
+ return env_value;
+}
2022-06-11 15:01:12 +02:00
+#endif
2024-11-29 08:21:08 +01:00
diff --git a/src/utf8_wrappers.h b/src/utf8_wrappers.h
new file mode 100644
index 0000000..22e853f
--- /dev/null
+++ b/src/utf8_wrappers.h
2023-04-07 19:11:01 +02:00
@@ -0,0 +1,46 @@
2022-06-11 15:01:12 +02:00
+/**
+ * Wrappers to provide Unicode (UTF-8) support on Windows.
+ *
+ * Copyright (c) 2018 Peter Wu <peter@lekensteyn.nl>
+ * SPDX-License-Identifier: (GPL-2.0-or-later OR MIT)
+ */
+
+#ifdef _WIN32
+
+#if defined(loadlib_c) || defined(lauxlib_c) || defined(liolib_c) || defined(luac_c)
+#include <stdio.h> /* for loadlib_c */
+FILE *fopen_utf8(const char *pathname, const char *mode);
+#define fopen fopen_utf8
+#endif
+
+#ifdef lauxlib_c
2022-11-16 04:23:45 +01:00
+#include <stdio.h>
2022-06-11 15:01:12 +02:00
+FILE *freopen_utf8(const char *pathname, const char *mode, FILE *stream);
+#define freopen freopen_utf8
+#endif
+
+#ifdef liolib_c
+FILE *popen_utf8(const char *command, const char *mode);
+#define _popen popen_utf8
+#endif
+
+#ifdef loslib_c
2022-11-16 04:23:45 +01:00
+#include <stdio.h>
2022-06-11 15:01:12 +02:00
+int remove_utf8(const char *pathname);
+int rename_utf8(const char *oldpath, const char *newpath);
+int system_utf8(const char *command);
2023-04-07 19:11:01 +02:00
+char *getenv_utf8(const char *varname);
2022-06-11 15:01:12 +02:00
+#define remove remove_utf8
+#define rename rename_utf8
+#define system system_utf8
2023-04-07 19:11:01 +02:00
+#define getenv getenv_utf8
2022-06-11 15:01:12 +02:00
+#endif
+
+#ifdef loadlib_c
+#include <windows.h>
+DWORD GetModuleFileNameA_utf8(HMODULE hModule, LPSTR lpFilename, DWORD nSize);
+HMODULE LoadLibraryExA_utf8(LPCSTR lpLibFileName, HANDLE hFile, DWORD dwFlags);
+#define GetModuleFileNameA GetModuleFileNameA_utf8
+#define LoadLibraryExA LoadLibraryExA_utf8
+#endif
+#endif
2024-11-29 08:21:08 +01:00
--
2.47.0