Add utf-8 conversion utilities and re-add intlGUIEditBox

author: est31 <MTest31@outlook.com> 2015-06-10 00:35:21 +0200
committer: est31 <MTest31@outlook.com> 2015-06-13 19:49:55 +0200
commit: 572990dcd3f43386860b5b14015ad722fb96ea83 (patch)
tree: f1f1f57b612aec58e226bf43ad1e2a78441cdc4f /src/util
parent: aa13baa30a45b0f834c23bd5c0407895eb8ec0ee (diff)
download: minetest-572990dcd3f43386860b5b14015ad722fb96ea83.tar.gz
minetest-572990dcd3f43386860b5b14015ad722fb96ea83.tar.bz2
minetest-572990dcd3f43386860b5b14015ad722fb96ea83.zip
2 files changed, 103 insertions, 1 deletions
diff --git a/src/util/string.cpp b/src/util/string.cpp
index 231eaf6be..49aff4a1f 100644
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@@ -29,9 +29,103 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #include <iomanip>
 #include <map>
 
+#ifndef _WIN32
+#include <iconv.h>
+#else
+#define _WIN32_WINNT 0x0501
+#include <windows.h>
+#endif
+
 static bool parseHexColorString(const std::string &value, video::SColor &color);
 static bool parseNamedColorString(const std::string &value, video::SColor &color);
 
+#ifndef _WIN32
+size_t convert(const char *to, const char *from, char *outbuf,
+		size_t outbuf_size, char *inbuf, size_t inbuf_size)
+{
+	iconv_t cd = iconv_open(to, from);
+
+#if defined(__FreeBSD__) || defined(__FreeBSD)
+	const char *inbuf_ptr = inbuf;
+#else
+	char *inbuf_ptr = inbuf;
+#endif
+
+	char *outbuf_ptr = outbuf;
+
+	size_t *inbuf_left_ptr = &inbuf_size;
+	size_t *outbuf_left_ptr = &outbuf_size;
+
+	while (inbuf_size > 0)
+		iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_left_ptr);
+
+	iconv_close(cd);
+	return 0;
+}
+
+std::wstring utf8_to_wide(const std::string &input)
+{
+	size_t inbuf_size = input.length() + 1;
+	// maximum possible size, every character is sizeof(wchar_t) bytes
+	size_t outbuf_size = (input.length() + 1) * sizeof(wchar_t);
+
+	char *inbuf = new char[inbuf_size];
+	memcpy(inbuf, input.c_str(), inbuf_size);
+	char *outbuf = new char[outbuf_size];
+	memset(outbuf, 0, outbuf_size);
+
+	convert("WCHAR_T", "UTF-8", outbuf, outbuf_size, inbuf, inbuf_size);
+	std::wstring out((wchar_t*)outbuf);
+
+	delete[] inbuf;
+	delete[] outbuf;
+
+	return out;
+}
+
+std::string wide_to_utf8(const std::wstring &input)
+{
+	size_t inbuf_size = (input.length() + 1) * sizeof(wchar_t);
+	// maximum possible size: utf-8 encodes codepoints using 1 up to 6 bytes
+	size_t outbuf_size = (input.length() + 1) * 6;
+
+	char *inbuf = new char[inbuf_size];
+	memcpy(inbuf, input.c_str(), inbuf_size);
+	char *outbuf = new char[outbuf_size];
+	memset(outbuf, 0, outbuf_size);
+
+	convert("UTF-8", "WCHAR_T", outbuf, outbuf_size, inbuf, inbuf_size);
+	std::string out(outbuf);
+
+	delete[] inbuf;
+	delete[] outbuf;
+
+	return out;
+}
+#else
+std::wstring utf8_to_wide(const std::string &input)
+{
+	size_t outbuf_size = input.size() + 1;
+	wchar_t *outbuf = new wchar_t[outbuf_size];
+	memset(outbuf, 0, outbuf_size * sizeof(wchar_t));
+	MultiByteToWideChar(CP_UTF8, 0, input.c_str(), input.size(), outbuf, outbuf_size);
+	std::wstring out(outbuf);
+	delete[] outbuf;
+	return out;
+}
+
+std::string wide_to_utf8(const std::wstring &input)
+{
+	size_t outbuf_size = (input.size() + 1) * 6;
+	char *outbuf = new char[outbuf_size];
+	memset(outbuf, 0, outbuf_size);
+	WideCharToMultiByte(CP_UTF8, 0, input.c_str(), input.size(), outbuf, outbuf_size, NULL, NULL);
+	std::string out(outbuf);
+	delete[] outbuf;
+	return out;
+}
+#endif
+
 
 // You must free the returned string!
 // The returned string is allocated using new
diff --git a/src/util/string.h b/src/util/string.h
index b80e3c9a8..5bf2b5b7c 100644
--- a/src/util/string.h
+++ b/src/util/string.h
@@ -39,10 +39,18 @@ struct FlagDesc {
 	u32 flag;
 };
 
+// try not to convert between wide/utf8 encodings; this can result in data loss
+// try to only convert between them when you need to input/output stuff via Irrlicht
+std::wstring utf8_to_wide(const std::string &input);
+std::string wide_to_utf8(const std::wstring &input);
+
+// NEVER use those two functions unless you have a VERY GOOD reason to
+// they just convert between wide and multibyte encoding
+// multibyte encoding depends on current locale, this is no good, especially on Windows
+
 // You must free the returned string!
 // The returned string is allocated using new
 wchar_t *narrow_to_wide_c(const char *str);
-
 std::wstring narrow_to_wide(const std::string &mbs);
 std::string wide_to_narrow(const std::wstring &wcs);
author	est31 <MTest31@outlook.com>	2015-06-10 00:35:21 +0200
committer	est31 <MTest31@outlook.com>	2015-06-13 19:49:55 +0200
commit	572990dcd3f43386860b5b14015ad722fb96ea83 (patch)
tree	f1f1f57b612aec58e226bf43ad1e2a78441cdc4f /src/util
parent	aa13baa30a45b0f834c23bd5c0407895eb8ec0ee (diff)
download	minetest-572990dcd3f43386860b5b14015ad722fb96ea83.tar.gz minetest-572990dcd3f43386860b5b14015ad722fb96ea83.tar.bz2 minetest-572990dcd3f43386860b5b14015ad722fb96ea83.zip