From e6b656e93723ea2f0d36c8da39baba4fa099d711 Mon Sep 17 00:00:00 2001 From: orwell96 Date: Wed, 16 Dec 2020 16:33:41 +0100 Subject: Implement basic serialization and file opening --- serialize_lib/init.lua | 92 ++++++++++++++++ serialize_lib/mod.conf | 1 + serialize_lib/readme.md | 7 ++ serialize_lib/serialize.lua | 260 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 360 insertions(+) create mode 100644 serialize_lib/init.lua create mode 100644 serialize_lib/mod.conf create mode 100644 serialize_lib/readme.md create mode 100644 serialize_lib/serialize.lua diff --git a/serialize_lib/init.lua b/serialize_lib/init.lua new file mode 100644 index 0000000..7a1a10b --- /dev/null +++ b/serialize_lib/init.lua @@ -0,0 +1,92 @@ +-- serialize_lib +--[[ + Copyright (C) 2020 Moritz Blei (orwell96) and contributors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. +]]-- + +serialize_lib = {} + +--[[ Configuration table +Whenever asked for a "config", the following table structure is expected: +config = { + skip_empty_tables = false -- if true, does not store empty tables + -- On next read, keys that mapped to empty tables resolve to nil + -- Used by: write_table_to_file +} +Not all functions use all of the parameters, so you can simplify your config sometimes +]] + +-- log utils +-- ========= + + +function serialize_lib.log_error(text) + minetest.log("error", "[serialize_lib] "..text) +end +function serialize_lib.log_warn(text) + minetest.log("warning", "[serialize_lib] "..text) +end +function serialize_lib.log_info(text) + minetest.log("action", "[serialize_lib] "..text) +end +function serialize_lib.log_debug(text) + minetest.log("action", "[serialize_lib](debug) "..text) +end + +-- basic serialization/deserialization +-- =================================== + +local ser = dofile("serialize.lua") + +-- Opens the passed filename, and returns deserialized table +-- When an error occurs, logs an error and returns false +function serialize_lib.read_table_from_file(filename) + local succ, err = pcall(ser.read_from_file, filename) + if not succ then + serialize_lib.log_error("Mod '"..minetest.get_current_modname().."': "..err) + end + return succ +end + +-- Writes table into file +-- When an error occurs, logs an error and returns false +function serialize_lib.write_table_to_file(filename) + local succ, err = pcall(ser.write_to_file, filename) + if not succ then + serialize_lib.log_error("Mod '"..minetest.get_current_modname().."': "..err) + end + return succ +end + +-- Managing files and backups +-- ========================== + +--[[ +The plain scheme just overwrites the file in place. This however poses problems when we are interrupted right within +the write, so we have incomplete data. So, the following scheme is applied: +1. writes to .new (if .new already exists, try to complete the moving first) +2. moves to .old, possibly overwriting an existing file (special windows behavior) +3. moves .new to + +During loading, we apply the following order of precedence: +1. .new +2. +3. .old + +Normal case: and .old exist, loading +Interrupted during write: .new is damaged, loads last regular state +Interrupted during the move operations: either .new or represents the latest state +Other corruption: at least the .old state may still be present + +]]-- + + diff --git a/serialize_lib/mod.conf b/serialize_lib/mod.conf new file mode 100644 index 0000000..ac3a1bd --- /dev/null +++ b/serialize_lib/mod.conf @@ -0,0 +1 @@ +name = serialize_lib diff --git a/serialize_lib/readme.md b/serialize_lib/readme.md new file mode 100644 index 0000000..ac364b8 --- /dev/null +++ b/serialize_lib/readme.md @@ -0,0 +1,7 @@ +# serialize_lib +A Minetest mod library for safely storing large amounts of data in on-disk files. +Created out of the need to have a robust data store for advtrains. + +The main purpose is to load and store large Lua table structures into files, without loading everything in memory and exhausting the function constant limit of LuaJIT. + +Also contains various utilities to handle files on disk in a safe manner, retain multiple versions of the same file a.s.o. \ No newline at end of file diff --git a/serialize_lib/serialize.lua b/serialize_lib/serialize.lua new file mode 100644 index 0000000..8ffd917 --- /dev/null +++ b/serialize_lib/serialize.lua @@ -0,0 +1,260 @@ +-- serialize.lua +-- Lua-conformant library file that has no minetest dependencies +-- Contains the serialization and deserialization routines + +--[[ + +Structure of entry: +[keytype][key]:[valuetype][val] +Types: + B - bool + -> 0=false, 1=true + S - string + -> see below + N - number + -> thing compatible with tonumber() +Table: +[keytype][key]:T +... content is nested in table until the matching +E + +example: +LUA_SER v=1 { +Skey:Svalue key = "value", +N1:Seins [1] = "eins", +B1:T [true] = { +Sa:Sb a = "b", +Sc:B0 c = false, +E } +E } + +String representations: +In strings the following characters are escaped by & +'&' -> '&&' +(line break) -> '&n' +(CR) -> '&r' --> required? +':' -> '&:' +All other characters are unchanged as they bear no special meaning. +]] + +local write_table, literal_to_string, escape_chars, table_is_empty + +function table_is_empty(t) + for _,_ in pairs(t) do + return false + end + return true +end + +function write_table(t, file, config) + local ks, vs, writeit, istable + for key, value in pairs(t) do + ks = value_to_string(key, false) + writeit = true + istable = type(value)=="table" + + if istable then + vs = "T" + if config.skip_empty_tables then + writeit = not table_is_empty(value) + end + else + vs = value_to_string(value, true) + end + + if writeit then + file:write(ks..":"..vs.."\n") + + if istable then + write_table(value, file, config) + file:write("E\n") + end + end + end +end + +function value_to_string(t) + if type(t)=="table" then + error("Can not serialize a table in the key position!") + elseif type(t)=="boolean" then + if t then + return "B1" + else + return "B0" + end + elseif type(t)=="number" then + return "N"..t + elseif type(t)=="string" then + return "S"..escape_chars(t) + else + error("Can not serialize '"..type(t).."' type!") + end + return str +end + +function escape_chars(str) + local rstr = string.gsub(str, "&", "&&") + rstr = string.gsub(rstr, ":", "&:") + rstr = string.gsub(rstr, "\n", "&n") + return rstr +end + +------ + +local read_table, string_to_value, unescape_chars + +function read_table(t, file) + local line, ks, vs, kv, vv, vt + while true do + line = file:read("*l") + if not line then + error("Unexpected EOF or read error!") + end + + if line=="E" then + -- done with this table + return + end + ks, vs = string.match(line, "^(.+[^&]):(.+)$") + if not ks or not vs then + error("Unable to parse line: '"..line.."'!") + end + kv = string_to_value(ks) + vv, vt = string_to_value(vs, true) + if vt then + read_table(vv, file) + end + -- put read value in table + t[kv] = vv + end +end + +-- returns: value, is_table +function string_to_value(str, table_allow) + local first = string.sub(str, 1,1) + local rest = string.sub(str, 2) + if first=="T" then + if table_allow then + return {}, true + else + error("Table not allowed in key component!") + end + elseif first=="N" then + local num = tonumber(rest) + if num then + return num + else + error("Unable to parse number: '"..rest.."'!") + end + elseif first=="B" then + if rest=="0" then + return false + elseif rest=="1" then + return true + else + error("Unable to parse boolean: '"..rest.."'!") + end + elseif first=="S" then + return unescape_chars(rest) + else + error("Unknown literal type '"..first.."' for literal '"..str.."'!") + end +end + +function unescape_chars(str) --TODO + local rstr = string.gsub(str, "&:", ":") + rstr = string.gsub(rstr, "&n", "\n") + rstr = string.gsub(rstr, "&&", "&") + return rstr +end + +------ + +--[[ +config = { + skip_empty_tables = false -- if true, does not store empty tables + -- On next read, keys that mapped to empty tables resolve to nil +} +]] + +-- Writes the passed table into the passed file descriptor, and closes the file afterwards +local function write_to_fd(root_table, file, config) + file:write("LUA_SER v=1\n") + write_table(root_table, file, config) + file:write("E\nEND_SER\n") + file:close() +end + +-- Reads the file contents from the passed file descriptor and returns the table on success +-- Throws errors when something is wrong. +-- config: see above +local function read_from_fd(file) + local first_line = file:read("*l") + if first_line ~= "LUA_SER v=1" then + error("Expected header, got '"..first_line.."' instead!") + end + local t = {} + read_table(t, file) + local last_line = file:read("*l") + file:close() + if last_line ~= "END_SER" then + error("Missing END_SER, got '"..last_line.."' instead!") + end + return t +end + +-- Opens the passed filename and serializes root_table into it +-- config: see above +function write_to_file(root_table, filename, config) + -- try opening the file + local file, err = io.open(filename, "w") + if not file then + error("Failed opening file '"..filename.."' for write: "..err) + end + + write_to_fd(root_table, file, config) + return true +end + +-- Opens the passed filename, and returns its deserialized contents +function read_from_file(filename) + -- try opening the file + local file, err = io.open(filename, "r") + if not file then + error("Failed opening file '"..filename.."' for read: "..err) + end + + return read_from_fd(file) +end + +--[[ simple unit test +local testtable = { + key = "value", + [1] = "eins", + [true] = { + a = "b", + c = false, + }, + ["es:cape1"] = "foo:bar", + ["es&ca\npe2"] = "baz&bam\nbim", + ["es&&ca&\npe3"] = "baz&&bam&\nbim", + ["es&:cape4"] = "foo\n:bar" +} +local config = {} +--write_to_file(testtable, "test_out", config) +local t = read_from_file("test_out") +write_to_file(t, "test_out_2", config) +local t2 = read_from_file("test_out_2") +write_to_file(t2, "test_out_3", config) + +-- test_out_2 and test_out_3 should be equal + +--]] + + +return { + read_from_fd = read_from_fd, + write_to_fd = write_to_fd, + read_from_file = read_from_file, + write_to_file = write_to_file, +} -- cgit v1.2.3