From 6e8fdc75b2d9fad1e34937af6af7c7ae1c349631 Mon Sep 17 00:00:00 2001 From: "Y. Wang" Date: Fri, 29 Dec 2023 21:12:53 +0100 Subject: poconvert: minor refactor; add unittest poconvert.lua now recognizes fuzzy entries in PO files; these entries are also added to the target file, but they are prefixed with "#" (i.e. marked as comments). --- .build.yml | 6 ++++ advtrains/poconvert.lua | 68 +++++++++++++++++++++++++++------------ advtrains/spec/poconvert_spec.lua | 68 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 21 deletions(-) create mode 100644 advtrains/spec/poconvert_spec.lua diff --git a/.build.yml b/.build.yml index 6c4d697..72dbc89 100644 --- a/.build.yml +++ b/.build.yml @@ -14,3 +14,9 @@ tasks: busted cd ../serialize_lib busted + +- test_po_files : | + cd advtrains/advtrains + for f in po/*.po; do + luajit -e 'require("poconvert").from_string("advtrains", io.input():read("*a"))' < $f + done diff --git a/advtrains/poconvert.lua b/advtrains/poconvert.lua index c7704f6..f44cbd1 100644 --- a/advtrains/poconvert.lua +++ b/advtrains/poconvert.lua @@ -59,14 +59,20 @@ end local function readtoken(str, pos) local _, epos, tok = string.find(str, [[^%s*(%S+)]], pos) - if not epos then + if epos == nil then return nil, pos end return tok, epos+1 end +local function readcomment_add_flags(flags, s) + for flag in string.gmatch(s, ",%s*([^,]+)") do + flags[flag] = true + end +end + local function readcomment_aux(str, pos) - local _, epos, sval = string.find(str, "^\n*#%s*([^\n]*)", pos) + local _, epos, sval = string.find(str, "^\n*#([^\n]*)", pos) if not epos then return nil end @@ -76,10 +82,18 @@ end local function readcomment(str, pos) local st = {} local nxt = pos + local flags = {} while true do local s, npos = readcomment_aux(str, nxt) if not npos then - return table.concat(st, "\n"), nxt + local t = { + comment = table.concat(st, "\n"), + flags = flags, + } + return t, nxt + end + if string.sub(s, 1, 1) == "," then + readcomment_add_flags(flags, s) end table.insert(st, s) nxt = npos @@ -90,24 +104,32 @@ local function readpo(str) local st = {} local pos = 1 while true do - local tok - local _, npos = readcomment(str, pos) - tok, npos = readtoken(str, npos) - if not tok then - return st + local entry, nxt = readcomment(str, pos) + local msglines = 0 + while true do + local tok, npos = readtoken(str, nxt) + if tok == nil or string.sub(tok, 1, 1) == "#" then + break + elseif string.sub(tok, 1, 3) ~= "msg" then + return error("Invalid token: " .. tok) + elseif entry[tok] ~= nil then + break + else + local value, npos = readstring(str, npos) + assert(value ~= nil, "No string provided for " .. tok) + entry[tok] = value + nxt = npos + msglines = msglines+1 + end end - assert(tok == "msgid", "Invalid token: " .. tok) - local orig, tr - orig, npos = readstring(str, npos) - assert(orig ~= nil, "Missing untranslated string") - tok, npos = readtoken(str, npos) - assert(tok == "msgstr", "Invalid token: " .. tok) - tr, npos = readstring(str, npos) - assert(tr ~= nil, "Missing translated string") - if not (orig == "" or tr == "") then - st[orig] = tr + if msglines == 0 then + return st + elseif entry.msgid ~= "" then + assert(entry.msgid ~= nil, "Missing untranslated string") + assert(entry.msgstr ~= nil, "Missing translated string") + table.insert(st, entry) end - pos = npos + pos = nxt end end @@ -121,8 +143,12 @@ end local function convert_po_string(textdomain, str) local st = {string.format("# textdomain: %s", textdomain)} - for k, v in pairs(readpo(str)) do - table.insert(st, ("%s=%s"):format(escape_string(k), escape_string(v))) + for _, entry in ipairs(readpo(str)) do + local line = ("%s=%s"):format(escape_string(entry.msgid), escape_string(entry.msgstr)) + if entry.flags.fuzzy then + line = "#" .. line + end + table.insert(st, line) end return table.concat(st, "\n") end diff --git a/advtrains/spec/poconvert_spec.lua b/advtrains/spec/poconvert_spec.lua new file mode 100644 index 0000000..ef87619 --- /dev/null +++ b/advtrains/spec/poconvert_spec.lua @@ -0,0 +1,68 @@ +package.path = "../?.lua;" .. package.path +advtrains = {} +_G.advtrains = advtrains +local poconvert = require("poconvert") + +describe("PO file converter", function() + it("should convert PO files", function() + assert.equals([[ +# textdomain: foo +foo=bar +baz= +#@=wh\at\@n=@=w\as\@n +multiline@nstrings= +with context?=oder doch nicht]], poconvert.from_string("foo", [[ +msgid "" +msgstr "whatever metadata" + +msgid "foo" +msgstr "bar" + +msgid "baz" +msgstr "" + +#, fuzzy +msgid "=wh\\at\\\n" +msgstr "=w\\as\\\n" + +msgid "multi" +"line\n" +"strings" +msgstr "" + +msgctxt "i18n context" +msgid "with context?" +msgstr "oder doch nicht"]])) + end) + it("should reject invalid tokens", function() + assert.has.errors(function() + poconvert.from_string("", [[ +foo "" +bar ""]]) + end, "Invalid token: foo") + end) + it("should reject entries without a msgstr", function() + assert.has.errors(function() + poconvert.from_string("", [[msgid "foo"]]) + end, "Missing translated string") + end) + it("should reject entries without a msgid", function() + assert.has.errors(function() + poconvert.from_string("", [[msgstr "foo"]]) + end, "Missing untranslated string") + end) + it("should reject entries with improperly enclosed strings", function() + assert.has.errors(function() + poconvert.from_string("", [[ +msgid "foo" +msgstr "bar \]]) + end, "String extends beyond the end of input") + end) + it("should reject incomplete input", function() + assert.has.errors(function() + poconvert.from_string("", [[ +msgid "foo" +msgstr]]) + end, "No string provided for msgstr") + end) +end) -- cgit v1.2.3