From 3b65a6a36c3e910359c69cd3e3e3fd89e50ba23e Mon Sep 17 00:00:00 2001
From: figec <raptor.ext@gmail.com>
Date: Thu, 18 Jun 2015 21:34:17 +0300
Subject: Fix wrap_rows at inner byte of multibyte sequence

Also fix UTF-8 inner byte bounds and make unittest for case this fixes.
---
 src/unittest/test_utilities.cpp | 24 +++++++++++++++++-------
 src/util/string.h               | 14 +++++++++++---
 2 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'src')

diff --git a/src/unittest/test_utilities.cpp b/src/unittest/test_utilities.cpp
index 9678a81eb..df90d37bd 100644
--- a/src/unittest/test_utilities.cpp
+++ b/src/unittest/test_utilities.cpp
@@ -243,13 +243,23 @@ void TestUtilities::testWrapRows()
 {
 	UASSERT(wrap_rows("12345678",4) == "1234\n5678");
 	// test that wrap_rows doesn't wrap inside multibyte sequences
-	const unsigned char s[] = {
-		0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
-		0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
-		0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
-		0x6e, 0x2f, 0x2e, 0x2e, 0};
-	std::string str((char *)s);
-	UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
+	{
+		const unsigned char s[] = {
+			0x2f, 0x68, 0x6f, 0x6d, 0x65, 0x2f, 0x72, 0x61, 0x70, 0x74, 0x6f,
+			0x72, 0x2f, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0x2f,
+			0x6d, 0x69, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x74, 0x2f, 0x62, 0x69,
+			0x6e, 0x2f, 0x2e, 0x2e, 0};
+		std::string str((char *)s);
+		UASSERT(utf8_to_wide(wrap_rows(str, 20)) != L"<invalid UTF-8 string>");
+	};
+	{
+		const unsigned char s[] = {
+			0x74, 0x65, 0x73, 0x74, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81,
+			0xd1, 0x82, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82,
+			0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0};
+		std::string str((char *)s);
+		UASSERT(utf8_to_wide(wrap_rows(str, 8)) != L"<invalid UTF-8 string>");
+	}
 }
 
 
diff --git a/src/util/string.h b/src/util/string.h
index 72d3c6075..b4ce5743d 100644
--- a/src/util/string.h
+++ b/src/util/string.h
@@ -33,7 +33,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 #define TOSTRING(x) STRINGIFY(x)
 
 // Checks whether a byte is an inner byte for an utf-8 multibyte sequence
-#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x <= 0xc0))
+#define IS_UTF8_MULTB_INNER(x) (((unsigned char)x >= 0x80) && ((unsigned char)x < 0xc0))
 
 typedef std::map<std::string, std::string> StringMap;
 
@@ -426,12 +426,20 @@ inline std::string wrap_rows(const std::string &from,
 {
 	std::string to;
 
+	bool need_to_wrap = false;
+
 	size_t character_idx = 0;
 	for (size_t i = 0; i < from.size(); i++) {
 		if (character_idx > 0 && character_idx % row_len == 0)
-			to += '\n';
-		if (!IS_UTF8_MULTB_INNER(from[i]))
+			need_to_wrap = true;
+		if (!IS_UTF8_MULTB_INNER(from[i])) {
+			// Wrap string if needed before next char started
+			if (need_to_wrap) {
+				to += '\n';
+				need_to_wrap = false;
+			}
 			character_idx++;
+		}
 		to += from[i];
 	}
 
-- 
cgit v1.2.3