summaryrefslogtreecommitdiff
path: root/src/util/string.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/string.cpp')
-rw-r--r--src/util/string.cpp186
1 files changed, 95 insertions, 91 deletions
diff --git a/src/util/string.cpp b/src/util/string.cpp
index 6e1db798c..611ad35cb 100644
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@@ -27,6 +27,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "translation.h"
#include <algorithm>
+#include <array>
#include <sstream>
#include <iomanip>
#include <map>
@@ -49,8 +50,8 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color
#ifndef _WIN32
-bool convert(const char *to, const char *from, char *outbuf,
- size_t outbuf_size, char *inbuf, size_t inbuf_size)
+static bool convert(const char *to, const char *from, char *outbuf,
+ size_t *outbuf_size, char *inbuf, size_t inbuf_size)
{
iconv_t cd = iconv_open(to, from);
@@ -59,15 +60,14 @@ bool convert(const char *to, const char *from, char *outbuf,
#else
char *inbuf_ptr = inbuf;
#endif
-
char *outbuf_ptr = outbuf;
size_t *inbuf_left_ptr = &inbuf_size;
- size_t *outbuf_left_ptr = &outbuf_size;
+ const size_t old_outbuf_size = *outbuf_size;
size_t old_size = inbuf_size;
while (inbuf_size > 0) {
- iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_left_ptr);
+ iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
if (inbuf_size == old_size) {
iconv_close(cd);
return false;
@@ -76,11 +76,12 @@ bool convert(const char *to, const char *from, char *outbuf,
}
iconv_close(cd);
+ *outbuf_size = old_outbuf_size - *outbuf_size;
return true;
}
#ifdef __ANDROID__
-// Android need manual caring to support the full character set possible with wchar_t
+// On Android iconv disagrees how big a wchar_t is for whatever reason
const char *DEFAULT_ENCODING = "UTF-32LE";
#else
const char *DEFAULT_ENCODING = "WCHAR_T";
@@ -88,58 +89,52 @@ const char *DEFAULT_ENCODING = "WCHAR_T";
std::wstring utf8_to_wide(const std::string &input)
{
- size_t inbuf_size = input.length() + 1;
+ const size_t inbuf_size = input.length();
// maximum possible size, every character is sizeof(wchar_t) bytes
- size_t outbuf_size = (input.length() + 1) * sizeof(wchar_t);
+ size_t outbuf_size = input.length() * sizeof(wchar_t);
- char *inbuf = new char[inbuf_size];
+ char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
memcpy(inbuf, input.c_str(), inbuf_size);
- char *outbuf = new char[outbuf_size];
- memset(outbuf, 0, outbuf_size);
+ std::wstring out;
+ out.resize(outbuf_size / sizeof(wchar_t));
#ifdef __ANDROID__
- // Android need manual caring to support the full character set possible with wchar_t
SANITY_CHECK(sizeof(wchar_t) == 4);
#endif
- if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, outbuf_size, inbuf, inbuf_size)) {
+ char *outbuf = reinterpret_cast<char*>(&out[0]);
+ if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
<< " into wstring" << std::endl;
delete[] inbuf;
- delete[] outbuf;
return L"<invalid UTF-8 string>";
}
- std::wstring out((wchar_t *)outbuf);
-
delete[] inbuf;
- delete[] outbuf;
+ out.resize(outbuf_size / sizeof(wchar_t));
return out;
}
std::string wide_to_utf8(const std::wstring &input)
{
- size_t inbuf_size = (input.length() + 1) * sizeof(wchar_t);
- // maximum possible size: utf-8 encodes codepoints using 1 up to 6 bytes
- size_t outbuf_size = (input.length() + 1) * 6;
+ const size_t inbuf_size = input.length() * sizeof(wchar_t);
+ // maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
+ size_t outbuf_size = input.length() * 4;
- char *inbuf = new char[inbuf_size];
+ char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
memcpy(inbuf, input.c_str(), inbuf_size);
- char *outbuf = new char[outbuf_size];
- memset(outbuf, 0, outbuf_size);
+ std::string out;
+ out.resize(outbuf_size);
- if (!convert("UTF-8", DEFAULT_ENCODING, outbuf, outbuf_size, inbuf, inbuf_size)) {
+ if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
<< " into UTF-8 string" << std::endl;
delete[] inbuf;
- delete[] outbuf;
- return "<invalid wstring>";
+ return "<invalid wide string>";
}
- std::string out(outbuf);
-
delete[] inbuf;
- delete[] outbuf;
+ out.resize(outbuf_size);
return out;
}
@@ -171,74 +166,15 @@ std::string wide_to_utf8(const std::wstring &input)
#endif // _WIN32
-// You must free the returned string!
-// The returned string is allocated using new
wchar_t *utf8_to_wide_c(const char *str)
{
std::wstring ret = utf8_to_wide(std::string(str));
size_t len = ret.length();
wchar_t *ret_c = new wchar_t[len + 1];
- memset(ret_c, 0, (len + 1) * sizeof(wchar_t));
- memcpy(ret_c, ret.c_str(), len * sizeof(wchar_t));
+ memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
return ret_c;
}
-// You must free the returned string!
-// The returned string is allocated using new
-wchar_t *narrow_to_wide_c(const char *str)
-{
- wchar_t *nstr = nullptr;
-#if defined(_WIN32)
- int nResult = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, 0, 0);
- if (nResult == 0) {
- errorstream<<"gettext: MultiByteToWideChar returned null"<<std::endl;
- } else {
- nstr = new wchar_t[nResult];
- MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) str, -1, (WCHAR *) nstr, nResult);
- }
-#else
- size_t len = strlen(str);
- nstr = new wchar_t[len + 1];
-
- std::wstring intermediate = narrow_to_wide(str);
- memset(nstr, 0, (len + 1) * sizeof(wchar_t));
- memcpy(nstr, intermediate.c_str(), len * sizeof(wchar_t));
-#endif
-
- return nstr;
-}
-
-std::wstring narrow_to_wide(const std::string &mbs) {
-#ifdef __ANDROID__
- return utf8_to_wide(mbs);
-#else
- size_t wcl = mbs.size();
- Buffer<wchar_t> wcs(wcl + 1);
- size_t len = mbstowcs(*wcs, mbs.c_str(), wcl);
- if (len == (size_t)(-1))
- return L"<invalid multibyte string>";
- wcs[len] = 0;
- return *wcs;
-#endif
-}
-
-
-std::string wide_to_narrow(const std::wstring &wcs)
-{
-#ifdef __ANDROID__
- return wide_to_utf8(wcs);
-#else
- size_t mbl = wcs.size() * 4;
- SharedBuffer<char> mbs(mbl+1);
- size_t len = wcstombs(*mbs, wcs.c_str(), mbl);
- if (len == (size_t)(-1))
- return "Character conversion failed!";
-
- mbs[len] = 0;
- return *mbs;
-#endif
-}
-
std::string urlencode(const std::string &str)
{
@@ -632,7 +568,7 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color
color_name = value;
}
- color_name = lowercase(value);
+ color_name = lowercase(color_name);
std::map<const std::string, unsigned>::const_iterator it;
it = named_colors.colors.find(color_name);
@@ -765,7 +701,8 @@ void translate_string(const std::wstring &s, Translations *translations,
} else {
// This is an escape sequence *inside* the template string to translate itself.
// This should not happen, show an error message.
- errorstream << "Ignoring escape sequence '" << wide_to_narrow(escape_sequence) << "' in translation" << std::endl;
+ errorstream << "Ignoring escape sequence '"
+ << wide_to_utf8(escape_sequence) << "' in translation" << std::endl;
}
}
@@ -889,3 +826,70 @@ std::wstring translate_string(const std::wstring &s)
return translate_string(s, g_client_translations);
#endif
}
+
+static const std::array<std::wstring, 22> disallowed_dir_names = {
+ // Problematic filenames from here:
+ // https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#file-and-directory-names
+ L"CON",
+ L"PRN",
+ L"AUX",
+ L"NUL",
+ L"COM1",
+ L"COM2",
+ L"COM3",
+ L"COM4",
+ L"COM5",
+ L"COM6",
+ L"COM7",
+ L"COM8",
+ L"COM9",
+ L"LPT1",
+ L"LPT2",
+ L"LPT3",
+ L"LPT4",
+ L"LPT5",
+ L"LPT6",
+ L"LPT7",
+ L"LPT8",
+ L"LPT9",
+};
+
+/**
+ * List of characters that are blacklisted from created directories
+ */
+static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*.";
+
+/**
+ * Sanitize the name of a new directory. This consists of two stages:
+ * 1. Check for 'reserved filenames' that can't be used on some filesystems
+ * and add a prefix to them
+ * 2. Remove 'unsafe' characters from the name by replacing them with '_'
+ */
+std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix)
+{
+ std::wstring safe_name = utf8_to_wide(str);
+
+ for (std::wstring disallowed_name : disallowed_dir_names) {
+ if (str_equal(safe_name, disallowed_name, true)) {
+ safe_name = utf8_to_wide(optional_prefix) + safe_name;
+ break;
+ }
+ }
+
+ for (unsigned long i = 0; i < safe_name.length(); i++) {
+ bool is_valid = true;
+
+ // Unlikely, but control characters should always be blacklisted
+ if (safe_name[i] < 32) {
+ is_valid = false;
+ } else if (safe_name[i] < 128) {
+ is_valid = disallowed_path_chars.find_first_of(safe_name[i])
+ == std::wstring::npos;
+ }
+
+ if (!is_valid)
+ safe_name[i] = '_';
+ }
+
+ return wide_to_utf8(safe_name);
+}