aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfigec <raptor.ext@gmail.com>2015-06-18 21:34:17 +0300
committerest31 <MTest31@outlook.com>2015-06-18 23:39:23 +0200
commit3b65a6a36c3e910359c69cd3e3e3fd89e50ba23e (patch)
treebda855f40761027cff71191505df2131a0c8f63f
parente45ecad3ab401e169d1c8c9ba9448360b215b3e5 (diff)
downloadminetest-3b65a6a36c3e910359c69cd3e3e3fd89e50ba23e.tar.gz
minetest-3b65a6a36c3e910359c69cd3e3e3fd89e50ba23e.tar.bz2
minetest-3b65a6a36c3e910359c69cd3e3e3fd89e50ba23e.zip
Fix wrap_rows at inner byte of multibyte sequence
Also fix UTF-8 inner byte bounds and make unittest for case this fixes.
-rw-r--r--src/unittest/test_utilities.cpp24
-rw-r--r--src/util/string.h14
2 files changed, 28 insertions, 10 deletions
diff --git a/src/unittest/test_utilities.cpp b/src/unittest/test_utilities.cpp
index 9678a81eb..df90d37bd 100644
--- a/src/unittest/test_utilities.cpp
+++ b/src/unittest/test_utilities.cpp
@@ -243,13 +243,23 @@ void TestUtilities::testWrapRows()
{
UASSERT(wrap_rows("12345678",4) == "1234\n5678");
// test that wrap_rows doesn't wrap inside multibyte sequences
- const unsigned char s[] = {
- 0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
- 0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
- 0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
- 0x6e, 0x2f, 0x2e, 0x2e, 0};
- std::string str((char *)s);
- UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
+ {
+ const unsigned char s[] = {
+ 0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
+ 0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
+ 0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
+ 0x6e, 0x2f, 0x2e, 0x2e, 0};
+ std::string str((char *)s);
+ UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
+ };
+ {
+ const unsigned char s[] = {
+ 0x74, 0x65, 0x73, 0x74, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81,
+ 0xd1, 0x82, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82,
+ 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0};
+ std::string str((char *)s);
+ UASSERT(utf8_to_wide(wrap_rows(str, 8)) != L"<invalid UTF-8 string>");
+ }
}
diff --git a/src/util/string.h b/src/util/string.h
index 72d3c6075..b4ce5743d 100644
--- a/src/util/string.h
+++ b/src/util/string.h
@@ -33,7 +33,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#define TOSTRING(x) STRINGIFY(x)
// Checks whether a byte is an inner byte for an utf-8 multibyte sequence
-#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x <= 0xc0))
+#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x < 0xc0))
typedef std::map<std::string, std::string> StringMap;
@@ -426,12 +426,20 @@ inline std::string wrap_rows(const std::string &from,
{
std::string to;
+ bool need_to_wrap = false;
+
size_t character_idx = 0;
for (size_t i = 0; i < from.size(); i++) {
if (character_idx > 0 && character_idx % row_len == 0)
- to += '\n';
- if (!IS_UTF8_MULTB_INNER(from[i]))
+ need_to_wrap = true;
+ if (!IS_UTF8_MULTB_INNER(from[i])) {
+ // Wrap string if needed before next char started
+ if (need_to_wrap) {
+ to += '\n';
+ need_to_wrap = false;
+ }
character_idx++;
+ }
to += from[i];
}