1 files changed, 95 insertions, 91 deletions
diff --git a/src/util/string.cpp b/src/util/string.cpp
index 6e1db798c..611ad35cb 100644
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@@ -27,6 +27,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #include "translation.h"
 
 #include <algorithm>
+#include <array>
 #include <sstream>
 #include <iomanip>
 #include <map>
@@ -49,8 +50,8 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color
 
 #ifndef _WIN32
 
-bool convert(const char *to, const char *from, char *outbuf,
-		size_t outbuf_size, char *inbuf, size_t inbuf_size)
+static bool convert(const char *to, const char *from, char *outbuf,
+		size_t *outbuf_size, char *inbuf, size_t inbuf_size)
 {
 	iconv_t cd = iconv_open(to, from);
 
@@ -59,15 +60,14 @@ bool convert(const char *to, const char *from, char *outbuf,
 #else
 	char *inbuf_ptr = inbuf;
 #endif
-
 	char *outbuf_ptr = outbuf;
 
 	size_t *inbuf_left_ptr = &inbuf_size;
-	size_t *outbuf_left_ptr = &outbuf_size;
 
+	const size_t old_outbuf_size = *outbuf_size;
 	size_t old_size = inbuf_size;
 	while (inbuf_size > 0) {
-		iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_left_ptr);
+		iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
 		if (inbuf_size == old_size) {
 			iconv_close(cd);
 			return false;
@@ -76,11 +76,12 @@ bool convert(const char *to, const char *from, char *outbuf,
 	}
 
 	iconv_close(cd);
+	*outbuf_size = old_outbuf_size - *outbuf_size;
 	return true;
 }
 
 #ifdef __ANDROID__
-// Android need manual caring to support the full character set possible with wchar_t
+// On Android iconv disagrees how big a wchar_t is for whatever reason
 const char *DEFAULT_ENCODING = "UTF-32LE";
 #else
 const char *DEFAULT_ENCODING = "WCHAR_T";
@@ -88,58 +89,52 @@ const char *DEFAULT_ENCODING = "WCHAR_T";
 
 std::wstring utf8_to_wide(const std::string &input)
 {
-	size_t inbuf_size = input.length() + 1;
+	const size_t inbuf_size = input.length();
 	// maximum possible size, every character is sizeof(wchar_t) bytes
-	size_t outbuf_size = (input.length() + 1) * sizeof(wchar_t);
+	size_t outbuf_size = input.length() * sizeof(wchar_t);
 
-	char *inbuf = new char[inbuf_size];
+	char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
 	memcpy(inbuf, input.c_str(), inbuf_size);
-	char *outbuf = new char[outbuf_size];
-	memset(outbuf, 0, outbuf_size);
+	std::wstring out;
+	out.resize(outbuf_size / sizeof(wchar_t));
 
 #ifdef __ANDROID__
-	// Android need manual caring to support the full character set possible with wchar_t
 	SANITY_CHECK(sizeof(wchar_t) == 4);
 #endif
 
-	if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, outbuf_size, inbuf, inbuf_size)) {
+	char *outbuf = reinterpret_cast<char*>(&out[0]);
+	if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
 		infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
 			<< " into wstring" << std::endl;
 		delete[] inbuf;
-		delete[] outbuf;
 		return L"<invalid UTF-8 string>";
 	}
-	std::wstring out((wchar_t *)outbuf);
-
 	delete[] inbuf;
-	delete[] outbuf;
 
+	out.resize(outbuf_size / sizeof(wchar_t));
 	return out;
 }
 
 std::string wide_to_utf8(const std::wstring &input)
 {
-	size_t inbuf_size = (input.length() + 1) * sizeof(wchar_t);
-	// maximum possible size: utf-8 encodes codepoints using 1 up to 6 bytes
-	size_t outbuf_size = (input.length() + 1) * 6;
+	const size_t inbuf_size = input.length() * sizeof(wchar_t);
+	// maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
+	size_t outbuf_size = input.length() * 4;
 
-	char *inbuf = new char[inbuf_size];
+	char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
 	memcpy(inbuf, input.c_str(), inbuf_size);
-	char *outbuf = new char[outbuf_size];
-	memset(outbuf, 0, outbuf_size);
+	std::string out;
+	out.resize(outbuf_size);
 
-	if (!convert("UTF-8", DEFAULT_ENCODING, outbuf, outbuf_size, inbuf, inbuf_size)) {
+	if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
 		infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
 			<< " into UTF-8 string" << std::endl;
 		delete[] inbuf;
-		delete[] outbuf;
-		return "<invalid wstring>";
+		return "<invalid wide string>";
 	}
-	std::string out(outbuf);
-
 	delete[] inbuf;
-	delete[] outbuf;
 
+	out.resize(outbuf_size);
 	return out;
 }
 
@@ -171,74 +166,15 @@ std::string wide_to_utf8(const std::wstring &input)
 
 #endif // _WIN32
 
-// You must free the returned string!
-// The returned string is allocated using new
 wchar_t *utf8_to_wide_c(const char *str)
 {
 	std::wstring ret = utf8_to_wide(std::string(str));
 	size_t len = ret.length();
 	wchar_t *ret_c = new wchar_t[len + 1];
-	memset(ret_c, 0, (len + 1) * sizeof(wchar_t));
-	memcpy(ret_c, ret.c_str(), len * sizeof(wchar_t));
+	memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
 	return ret_c;
 }
 
-// You must free the returned string!
-// The returned string is allocated using new
-wchar_t *narrow_to_wide_c(const char *str)
-{
-	wchar_t *nstr = nullptr;
-#if defined(_WIN32)
-	int nResult = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, 0, 0);
-	if (nResult == 0) {
-		errorstream<<"gettext: MultiByteToWideChar returned null"<<std::endl;
-	} else {
-		nstr = new wchar_t[nResult];
-		MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, (WCHAR *) nstr, nResult);
-	}
-#else
-	size_t len = strlen(str);
-	nstr = new wchar_t[len + 1];
-
-	std::wstring intermediate = narrow_to_wide(str);
-	memset(nstr, 0, (len + 1) * sizeof(wchar_t));
-	memcpy(nstr, intermediate.c_str(), len * sizeof(wchar_t));
-#endif
-
-	return nstr;
-}
-
-std::wstring narrow_to_wide(const std::string &mbs) {
-#ifdef __ANDROID__
-	return utf8_to_wide(mbs);
-#else
-	size_t wcl = mbs.size();
-	Buffer<wchar_t> wcs(wcl + 1);
-	size_t len = mbstowcs(*wcs, mbs.c_str(), wcl);
-	if (len == (size_t)(-1))
-		return L"<invalid multibyte string>";
-	wcs[len] = 0;
-	return *wcs;
-#endif
-}
-
-
-std::string wide_to_narrow(const std::wstring &wcs)
-{
-#ifdef __ANDROID__
-	return wide_to_utf8(wcs);
-#else
-	size_t mbl = wcs.size() * 4;
-	SharedBuffer<char> mbs(mbl+1);
-	size_t len = wcstombs(*mbs, wcs.c_str(), mbl);
-	if (len == (size_t)(-1))
-		return "Character conversion failed!";
-
-	mbs[len] = 0;
-	return *mbs;
-#endif
-}
-
 
 std::string urlencode(const std::string &str)
 {
@@ -632,7 +568,7 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color
 		color_name = value;
 	}
 
-	color_name = lowercase(value);
+	color_name = lowercase(color_name);
 
 	std::map<const std::string, unsigned>::const_iterator it;
 	it = named_colors.colors.find(color_name);
@@ -765,7 +701,8 @@ void translate_string(const std::wstring &s, Translations *translations,
 		} else {
 			// This is an escape sequence *inside* the template string to translate itself.
 			// This should not happen, show an error message.
-			errorstream << "Ignoring escape sequence '" << wide_to_narrow(escape_sequence) << "' in translation" << std::endl;
+			errorstream << "Ignoring escape sequence '"
+				<< wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
 		}
 	}
 
@@ -889,3 +826,70 @@ std::wstring translate_string(const std::wstring &s)
 	return translate_string(s, g_client_translations);
 #endif
 }
+
+static const std::array<std::wstring, 22> disallowed_dir_names = {
+	// Problematic filenames from here:
+	// https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
+	L"CON",
+	L"PRN",
+	L"AUX",
+	L"NUL",
+	L"COM1",
+	L"COM2",
+	L"COM3",
+	L"COM4",
+	L"COM5",
+	L"COM6",
+	L"COM7",
+	L"COM8",
+	L"COM9",
+	L"LPT1",
+	L"LPT2",
+	L"LPT3",
+	L"LPT4",
+	L"LPT5",
+	L"LPT6",
+	L"LPT7",
+	L"LPT8",
+	L"LPT9",
+};
+
+/**
+ * List of characters that are blacklisted from created directories
+ */
+static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
+
+/**
+ * Sanitize the name of a new directory. This consists of two stages:
+ * 1. Check for 'reserved filenames' that can't be used on some filesystems
+ *	and add a prefix to them
+ * 2. Remove 'unsafe' characters from the name by replacing them with '_'
+ */
+std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
+{
+	std::wstring safe_name = utf8_to_wide(str);
+
+	for (std::wstring disallowed_name : disallowed_dir_names) {
+		if (str_equal(safe_name, disallowed_name, true)) {
+			safe_name = utf8_to_wide(optional_prefix) + safe_name;
+			break;
+		}
+	}
+
+	for (unsigned long i = 0; i < safe_name.length(); i++) {
+		bool is_valid = true;
+
+		// Unlikely, but control characters should always be blacklisted
+		if (safe_name[i] < 32) {
+			is_valid = false;
+		} else if (safe_name[i] < 128) {
+			is_valid = disallowed_path_chars.find_first_of(safe_name[i])
+					== std::wstring::npos;
+		}
+
+		if (!is_valid)
+			safe_name[i] = '_';
+	}
+
+	return wide_to_utf8(safe_name);
+}