From dae6fe91a1059751a0bde504cdf41a749234ce1a Mon Sep 17 00:00:00 2001 From: ShadowNinja Date: Mon, 31 Jan 2022 21:11:51 -0500 Subject: Update directory name sanitization Only ASCII spaces have to be handles specially, and leading spaces are also disallowed. --- src/unittest/test_utilities.cpp | 12 ++++++++---- src/util/string.cpp | 39 ++++++++++++++------------------------- src/util/string.h | 2 +- 3 files changed, 23 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/unittest/test_utilities.cpp b/src/unittest/test_utilities.cpp index 228a9559f..10ea8d36a 100644 --- a/src/unittest/test_utilities.cpp +++ b/src/unittest/test_utilities.cpp @@ -636,8 +636,12 @@ void TestUtilities::testBase64() void TestUtilities::testSanitizeDirName() { - UASSERT(sanitizeDirName("a", "_") == "a"); - UASSERT(sanitizeDirName("COM1", "_") == "_COM1"); - UASSERT(sanitizeDirName("cOm\u00B2 .txt:a", "_") == "cOm\u00B2 _txt_a"); - UASSERT(sanitizeDirName("cOnIn$ ", "_") == "_cOnIn$ "); + UASSERT(sanitizeDirName("a", "~") == "a"); + UASSERT(sanitizeDirName(" ", "~") == "__"); + UASSERT(sanitizeDirName(" a ", "~") == "_a_"); + UASSERT(sanitizeDirName("COM1", "~") == "~COM1"); + UASSERT(sanitizeDirName("COM1", ":") == "_COM1"); + UASSERT(sanitizeDirName("cOm\u00B2", "~") == "~cOm\u00B2"); + UASSERT(sanitizeDirName("cOnIn$", "~") == "~cOnIn$"); + UASSERT(sanitizeDirName(" cOnIn$ ", "~") == "_cOnIn$_"); } diff --git a/src/util/string.cpp b/src/util/string.cpp index 689f58f7f..39cd44667 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -864,40 +864,29 @@ static const std::array disallowed_dir_names = { static const std::wstring disallowed_path_chars = L"<>:\"/\\|?*."; -/** - * @param str - * @return A copy of \p str with trailing whitespace removed. - */ -static std::wstring wrtrim(const std::wstring &str) -{ - size_t back = str.size(); - while (back > 0 && std::isspace(str[back - 1])) - --back; - - return str.substr(0, back); -} - - -/** - * Sanitize the name of a new directory. This consists of two stages: - * 1. Check for 'reserved filenames' that can't be used on some filesystems - * and add a prefix to them - * 2. Remove 'unsafe' characters from the name by replacing them with '_' - */ std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix) { std::wstring safe_name = utf8_to_wide(str); - std::wstring dev_name = wrtrim(safe_name); - for (std::wstring disallowed_name : disallowed_dir_names) { - if (str_equal(dev_name, disallowed_name, true)) { + if (str_equal(safe_name, disallowed_name, true)) { safe_name = utf8_to_wide(optional_prefix) + safe_name; break; } } - for (unsigned long i = 0; i < safe_name.length(); i++) { + // Replace leading and trailing spaces with underscores. + size_t start = safe_name.find_first_not_of(L' '); + size_t end = safe_name.find_last_not_of(L' '); + if (start == std::wstring::npos || end == std::wstring::npos) + start = end = safe_name.size(); + for (size_t i = 0; i < start; i++) + safe_name[i] = L'_'; + for (size_t i = end + 1; i < safe_name.size(); i++) + safe_name[i] = L'_'; + + // Replace other disallowed characters with underscores + for (size_t i = 0; i < safe_name.length(); i++) { bool is_valid = true; // Unlikely, but control characters should always be blacklisted @@ -909,7 +898,7 @@ std::string sanitizeDirName(const std::string &str, const std::string &optional_ } if (!is_valid) - safe_name[i] = '_'; + safe_name[i] = L'_'; } return wide_to_utf8(safe_name); diff --git a/src/util/string.h b/src/util/string.h index f4ca1a7de..d8ec633ee 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -749,7 +749,7 @@ inline irr::core::stringw utf8_to_stringw(const std::string &input) /** * Sanitize the name of a new directory. This consists of two stages: * 1. Check for 'reserved filenames' that can't be used on some filesystems - * and prefix them + * and add a prefix to them * 2. Remove 'unsafe' characters from the name by replacing them with '_' */ std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix); -- cgit v1.2.3