diff options
Diffstat (limited to 'src/util/string.cpp')
-rw-r--r-- | src/util/string.cpp | 186 |
1 files changed, 95 insertions, 91 deletions
diff --git a/src/util/string.cpp b/src/util/string.cpp index 6e1db798c..611ad35cb 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -27,6 +27,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "translation.h" #include <algorithm> +#include <array> #include <sstream> #include <iomanip> #include <map> @@ -49,8 +50,8 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color #ifndef _WIN32 -bool convert(const char *to, const char *from, char *outbuf, - size_t outbuf_size, char *inbuf, size_t inbuf_size) +static bool convert(const char *to, const char *from, char *outbuf, + size_t *outbuf_size, char *inbuf, size_t inbuf_size) { iconv_t cd = iconv_open(to, from); @@ -59,15 +60,14 @@ bool convert(const char *to, const char *from, char *outbuf, #else char *inbuf_ptr = inbuf; #endif - char *outbuf_ptr = outbuf; size_t *inbuf_left_ptr = &inbuf_size; - size_t *outbuf_left_ptr = &outbuf_size; + const size_t old_outbuf_size = *outbuf_size; size_t old_size = inbuf_size; while (inbuf_size > 0) { - iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_left_ptr); + iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size); if (inbuf_size == old_size) { iconv_close(cd); return false; @@ -76,11 +76,12 @@ bool convert(const char *to, const char *from, char *outbuf, } iconv_close(cd); + *outbuf_size = old_outbuf_size - *outbuf_size; return true; } #ifdef __ANDROID__ -// Android need manual caring to support the full character set possible with wchar_t +// On Android iconv disagrees how big a wchar_t is for whatever reason const char *DEFAULT_ENCODING = "UTF-32LE"; #else const char *DEFAULT_ENCODING = "WCHAR_T"; @@ -88,58 +89,52 @@ const char *DEFAULT_ENCODING = "WCHAR_T"; std::wstring utf8_to_wide(const std::string &input) { - size_t inbuf_size = input.length() + 1; + const size_t inbuf_size = input.length(); // maximum possible size, every character is sizeof(wchar_t) bytes - size_t outbuf_size = (input.length() + 1) * sizeof(wchar_t); + size_t outbuf_size = input.length() * sizeof(wchar_t); - char *inbuf = new char[inbuf_size]; + char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated memcpy(inbuf, input.c_str(), inbuf_size); - char *outbuf = new char[outbuf_size]; - memset(outbuf, 0, outbuf_size); + std::wstring out; + out.resize(outbuf_size / sizeof(wchar_t)); #ifdef __ANDROID__ - // Android need manual caring to support the full character set possible with wchar_t SANITY_CHECK(sizeof(wchar_t) == 4); #endif - if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, outbuf_size, inbuf, inbuf_size)) { + char *outbuf = reinterpret_cast<char*>(&out[0]); + if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) { infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input) << " into wstring" << std::endl; delete[] inbuf; - delete[] outbuf; return L"<invalid UTF-8 string>"; } - std::wstring out((wchar_t *)outbuf); - delete[] inbuf; - delete[] outbuf; + out.resize(outbuf_size / sizeof(wchar_t)); return out; } std::string wide_to_utf8(const std::wstring &input) { - size_t inbuf_size = (input.length() + 1) * sizeof(wchar_t); - // maximum possible size: utf-8 encodes codepoints using 1 up to 6 bytes - size_t outbuf_size = (input.length() + 1) * 6; + const size_t inbuf_size = input.length() * sizeof(wchar_t); + // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes + size_t outbuf_size = input.length() * 4; - char *inbuf = new char[inbuf_size]; + char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated memcpy(inbuf, input.c_str(), inbuf_size); - char *outbuf = new char[outbuf_size]; - memset(outbuf, 0, outbuf_size); + std::string out; + out.resize(outbuf_size); - if (!convert("UTF-8", DEFAULT_ENCODING, outbuf, outbuf_size, inbuf, inbuf_size)) { + if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) { infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size) << " into UTF-8 string" << std::endl; delete[] inbuf; - delete[] outbuf; - return "<invalid wstring>"; + return "<invalid wide string>"; } - std::string out(outbuf); - delete[] inbuf; - delete[] outbuf; + out.resize(outbuf_size); return out; } @@ -171,74 +166,15 @@ std::string wide_to_utf8(const std::wstring &input) #endif // _WIN32 -// You must free the returned string! -// The returned string is allocated using new wchar_t *utf8_to_wide_c(const char *str) { std::wstring ret = utf8_to_wide(std::string(str)); size_t len = ret.length(); wchar_t *ret_c = new wchar_t[len + 1]; - memset(ret_c, 0, (len + 1) * sizeof(wchar_t)); - memcpy(ret_c, ret.c_str(), len * sizeof(wchar_t)); + memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t)); return ret_c; } -// You must free the returned string! -// The returned string is allocated using new -wchar_t *narrow_to_wide_c(const char *str) -{ - wchar_t *nstr = nullptr; -#if defined(_WIN32) - int nResult = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, 0, 0); - if (nResult == 0) { - errorstream<<"gettext: MultiByteToWideChar returned null"<<std::endl; - } else { - nstr = new wchar_t[nResult]; - MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, (WCHAR *) nstr, nResult); - } -#else - size_t len = strlen(str); - nstr = new wchar_t[len + 1]; - - std::wstring intermediate = narrow_to_wide(str); - memset(nstr, 0, (len + 1) * sizeof(wchar_t)); - memcpy(nstr, intermediate.c_str(), len * sizeof(wchar_t)); -#endif - - return nstr; -} - -std::wstring narrow_to_wide(const std::string &mbs) { -#ifdef __ANDROID__ - return utf8_to_wide(mbs); -#else - size_t wcl = mbs.size(); - Buffer<wchar_t> wcs(wcl + 1); - size_t len = mbstowcs(*wcs, mbs.c_str(), wcl); - if (len == (size_t)(-1)) - return L"<invalid multibyte string>"; - wcs[len] = 0; - return *wcs; -#endif -} - - -std::string wide_to_narrow(const std::wstring &wcs) -{ -#ifdef __ANDROID__ - return wide_to_utf8(wcs); -#else - size_t mbl = wcs.size() * 4; - SharedBuffer<char> mbs(mbl+1); - size_t len = wcstombs(*mbs, wcs.c_str(), mbl); - if (len == (size_t)(-1)) - return "Character conversion failed!"; - - mbs[len] = 0; - return *mbs; -#endif -} - std::string urlencode(const std::string &str) { @@ -632,7 +568,7 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color color_name = value; } - color_name = lowercase(value); + color_name = lowercase(color_name); std::map<const std::string, unsigned>::const_iterator it; it = named_colors.colors.find(color_name); @@ -765,7 +701,8 @@ void translate_string(const std::wstring &s, Translations *translations, } else { // This is an escape sequence *inside* the template string to translate itself. // This should not happen, show an error message. - errorstream << "Ignoring escape sequence '" << wide_to_narrow(escape_sequence) << "' in translation" << std::endl; + errorstream << "Ignoring escape sequence '" + << wide_to_utf8(escape_sequence) << "' in translation" << std::endl; } } @@ -889,3 +826,70 @@ std::wstring translate_string(const std::wstring &s) return translate_string(s, g_client_translations); #endif } + +static const std::array<std::wstring, 22> disallowed_dir_names = { + // Problematic filenames from here: + // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names + L"CON", + L"PRN", + L"AUX", + L"NUL", + L"COM1", + L"COM2", + L"COM3", + L"COM4", + L"COM5", + L"COM6", + L"COM7", + L"COM8", + L"COM9", + L"LPT1", + L"LPT2", + L"LPT3", + L"LPT4", + L"LPT5", + L"LPT6", + L"LPT7", + L"LPT8", + L"LPT9", +}; + +/** + * List of characters that are blacklisted from created directories + */ +static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*."; + +/** + * Sanitize the name of a new directory. This consists of two stages: + * 1. Check for 'reserved filenames' that can't be used on some filesystems + * and add a prefix to them + * 2. Remove 'unsafe' characters from the name by replacing them with '_' + */ +std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix) +{ + std::wstring safe_name = utf8_to_wide(str); + + for (std::wstring disallowed_name : disallowed_dir_names) { + if (str_equal(safe_name, disallowed_name, true)) { + safe_name = utf8_to_wide(optional_prefix) + safe_name; + break; + } + } + + for (unsigned long i = 0; i < safe_name.length(); i++) { + bool is_valid = true; + + // Unlikely, but control characters should always be blacklisted + if (safe_name[i] < 32) { + is_valid = false; + } else if (safe_name[i] < 128) { + is_valid = disallowed_path_chars.find_first_of(safe_name[i]) + == std::wstring::npos; + } + + if (!is_valid) + safe_name[i] = '_'; + } + + return wide_to_utf8(safe_name); +} |