summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsfan5 <sfan5@live.de>2022-05-01 13:44:42 +0200
committersfan5 <sfan5@live.de>2022-05-21 17:46:10 +0200
commit9ee3dc71f1bb094c04afe98832f69f2a95995340 (patch)
treec91432d8d0edbce44f126fe220f11ec76f3bb365
parent70dc23f996683a59dd85db74e8f15e49c4f7b9fd (diff)
downloadminetest-9ee3dc71f1bb094c04afe98832f69f2a95995340.tar.gz
minetest-9ee3dc71f1bb094c04afe98832f69f2a95995340.tar.bz2
minetest-9ee3dc71f1bb094c04afe98832f69f2a95995340.zip
Optimize JSON string (de)serialization routines
stringstreams were shown to be slow when reading/writing single characters and there is lots of potential by having functions perform on existing buffers whenever possible.
-rw-r--r--src/util/serialize.cpp217
1 files changed, 113 insertions, 104 deletions
diff --git a/src/util/serialize.cpp b/src/util/serialize.cpp
index 32e0a8936..ee46fd941 100644
--- a/src/util/serialize.cpp
+++ b/src/util/serialize.cpp
@@ -18,15 +18,14 @@ with this program; if not, write to the Free Software Foundation, Inc.,
*/
#include "serialize.h"
-#include "pointer.h"
#include "porting.h"
#include "util/string.h"
+#include "util/hex.h"
#include "exceptions.h"
#include "irrlichttypes.h"
-#include <sstream>
-#include <iomanip>
-#include <vector>
+#include <iostream>
+#include <cassert>
FloatType g_serialize_f32_type = FLOATTYPE_UNKNOWN;
@@ -120,118 +119,148 @@ std::string deSerializeString32(std::istream &is)
}
////
-//// JSON
+//// JSON-like strings
////
std::string serializeJsonString(const std::string &plain)
{
- std::ostringstream os(std::ios::binary);
- os << "\"";
+ std::string tmp;
+
+ tmp.reserve(plain.size() + 2);
+ tmp.push_back('"');
for (char c : plain) {
switch (c) {
case '"':
- os << "\\\"";
+ tmp.append("\\\"");
break;
case '\\':
- os << "\\\\";
+ tmp.append("\\\\");
break;
case '\b':
- os << "\\b";
+ tmp.append("\\b");
break;
case '\f':
- os << "\\f";
+ tmp.append("\\f");
break;
case '\n':
- os << "\\n";
+ tmp.append("\\n");
break;
case '\r':
- os << "\\r";
+ tmp.append("\\r");
break;
case '\t':
- os << "\\t";
+ tmp.append("\\t");
break;
default: {
if (c >= 32 && c <= 126) {
- os << c;
+ tmp.push_back(c);
} else {
- u32 cnum = (u8)c;
- os << "\\u" << std::hex << std::setw(4)
- << std::setfill('0') << cnum;
+ // We pretend that Unicode codepoints map to bytes (they don't)
+ u8 cnum = static_cast<u8>(c);
+ tmp.append("\\u00");
+ tmp.push_back(hex_chars[cnum >> 4]);
+ tmp.push_back(hex_chars[cnum & 0xf]);
}
break;
}
}
}
- os << "\"";
- return os.str();
+ tmp.push_back('"');
+ return tmp;
+}
+
+static void deSerializeJsonString(std::string &s)
+{
+ assert(s.size() >= 2);
+ assert(s.front() == '"' && s.back() == '"');
+
+ size_t w = 0; // write index
+ size_t i = 1; // read index
+ const size_t len = s.size() - 1; // string length with trailing quote removed
+
+ while (i < len) {
+ char c = s[i++];
+ assert(c != '"');
+
+ if (c != '\\') {
+ s[w++] = c;
+ continue;
+ }
+
+ if (i >= len)
+ throw SerializationError("JSON string ended prematurely");
+ char c2 = s[i++];
+ switch (c2) {
+ case 'b':
+ s[w++] = '\b';
+ break;
+ case 'f':
+ s[w++] = '\f';
+ break;
+ case 'n':
+ s[w++] = '\n';
+ break;
+ case 'r':
+ s[w++] = '\r';
+ break;
+ case 't':
+ s[w++] = '\t';
+ break;
+ case 'u': {
+ if (i + 3 >= len)
+ throw SerializationError("JSON string ended prematurely");
+ unsigned char v[4] = {};
+ for (int j = 0; j < 4; j++)
+ hex_digit_decode(s[i+j], v[j]);
+ i += 4;
+ u32 hexnumber = (v[0] << 12) | (v[1] << 8) | (v[2] << 4) | v[3];
+ // Note that this does not work for anything other than ASCII
+ // but these functions do not actually interact with real JSON input.
+ s[w++] = (int) hexnumber;
+ break;
+ }
+ default:
+ s[w++] = c2;
+ break;
+ }
+ }
+
+ assert(w <= i && i <= len);
+ // Truncate string to current write index
+ s.resize(w);
}
std::string deSerializeJsonString(std::istream &is)
{
- std::ostringstream os(std::ios::binary);
- char c, c2;
+ std::string tmp;
+ char c;
+ bool was_backslash = false;
// Parse initial doublequote
- is >> c;
+ c = is.get();
if (c != '"')
throw SerializationError("JSON string must start with doublequote");
+ tmp.push_back(c);
- // Parse characters
+ // Grab the entire json string
for (;;) {
c = is.get();
if (is.eof())
throw SerializationError("JSON string ended prematurely");
- if (c == '"') {
- return os.str();
- }
-
- if (c == '\\') {
- c2 = is.get();
- if (is.eof())
- throw SerializationError("JSON string ended prematurely");
- switch (c2) {
- case 'b':
- os << '\b';
- break;
- case 'f':
- os << '\f';
- break;
- case 'n':
- os << '\n';
- break;
- case 'r':
- os << '\r';
- break;
- case 't':
- os << '\t';
- break;
- case 'u': {
- int hexnumber;
- char hexdigits[4 + 1];
-
- is.read(hexdigits, 4);
- if (is.eof())
- throw SerializationError("JSON string ended prematurely");
- hexdigits[4] = 0;
-
- std::istringstream tmp_is(hexdigits, std::ios::binary);
- tmp_is >> std::hex >> hexnumber;
- os << (char)hexnumber;
- break;
- }
- default:
- os << c2;
- break;
- }
- } else {
- os << c;
- }
+ tmp.push_back(c);
+ if (was_backslash)
+ was_backslash = false;
+ else if (c == '\\')
+ was_backslash = true;
+ else if (c == '"')
+ break; // found end of string
}
- return os.str();
+ deSerializeJsonString(tmp);
+ return tmp;
}
std::string serializeJsonStringIfNeeded(const std::string &s)
@@ -245,41 +274,21 @@ std::string serializeJsonStringIfNeeded(const std::string &s)
std::string deSerializeJsonStringIfNeeded(std::istream &is)
{
- std::stringstream tmp_os(std::ios_base::binary | std::ios_base::in | std::ios_base::out);
- bool expect_initial_quote = true;
- bool is_json = false;
- bool was_backslash = false;
- for (;;) {
- char c = is.get();
- if (is.eof())
- break;
-
- if (expect_initial_quote && c == '"') {
- tmp_os << c;
- is_json = true;
- } else if(is_json) {
- tmp_os << c;
- if (was_backslash)
- was_backslash = false;
- else if (c == '\\')
- was_backslash = true;
- else if (c == '"')
- break; // Found end of string
- } else {
- if (c == ' ') {
- // Found end of word
- is.unget();
- break;
- }
-
- tmp_os << c;
- }
- expect_initial_quote = false;
- }
- if (is_json) {
- return deSerializeJsonString(tmp_os);
+ // Check for initial quote
+ char c = is.peek();
+ if (is.eof())
+ return "";
+
+ if (c == '"') {
+ // json string: defer to the right implementation
+ return deSerializeJsonString(is);
}
- return tmp_os.str();
+ // not a json string:
+ std::string tmp;
+ std::getline(is, tmp, ' ');
+ if (!is.eof())
+ is.unget(); // we hit a space, put it back
+ return tmp;
}