-- -- Copyright (c) 2021-2025 Zeping Lee -- Released under the MIT license. -- Repository: https://github.com/zepinglee/citeproc-lua -- local output_module = {} local lpeg = require("lpeg") local uni_utf8 local unicode local dom local ir_node local util local using_luatex, kpse = pcall(require, "kpse") if using_luatex then uni_utf8 = require("unicode").utf8 unicode = require("citeproc-unicode") dom = require("luaxml-domobject") ir_node = require("citeproc-ir-node") util = require("citeproc-util") else uni_utf8 = require("lua-utf8") unicode = require("citeproc.unicode") dom = require("citeproc.luaxml.domobject") ir_node = require("citeproc.ir-node") util = require("citeproc.util") end local GroupVar = ir_node.GroupVar ---@class LocalizedQuotes local LocalizedQuotes = { outer_open = util.unicode['left double quotation mark'], outer_close = util.unicode['right double quotation mark'], inner_open = util.unicode['left single quotation mark'], inner_close = util.unicode['right single quotation mark'], punctuation_in_quote = false, } function LocalizedQuotes:new(outer_open, outer_close, inner_open, inner_close, punctuation_in_quote) local o = { outer_open = outer_open or util.unicode['left double quotation mark'], outer_close = outer_close or util.unicode['right double quotation mark'], inner_open = inner_open or util.unicode['left single quotation mark'], inner_close = inner_close or util.unicode['right single quotation mark'], punctuation_in_quote = punctuation_in_quote, } setmetatable(o, self) self.__index = self return o end -- Inspired by: -- https://github.com/zotero/citeproc-rs/blob/master/crates/io/src/output/markup.rs#L67 -- https://hackage.haskell.org/package/pandoc-types-1.22.2.1/docs/Text-Pandoc-Definition.html ---@class InlineElement ---@field _type string ---@field _base_class string ---@field value string? ---@field inlines InlineElement[]? local InlineElement = { _type = "InlineElement", _base_class = "InlineElement", value = nil, inlines = nil, } ---comment ---@param class_name string ---@return table function InlineElement:derive(class_name) local o = { _type = class_name, } -- self[class_name] = o setmetatable(o, self) self.__index = self o.__index = o return o end function InlineElement:new(inlines) local o = { inlines = inlines, _type = self._type, } setmetatable(o, self) return o end function InlineElement:_debug(level) level = level or 0 local text = "" if level == 0 then text = "\n" end text = text .. self._type if self.formatting then text = text .. " [" for attr, value in pairs(self.formatting) do text = text .. attr .. '="' .. value .. '"' end text = text .. "] " end if self.is_inner then text = text .. " [inner quotes]" end text = text .. "(" if self.value then text = text .. '"' .. self.value .. '"' elseif self.inlines then for _, inline in ipairs(self.inlines) do text = text .. "\n" .. string.rep(" ", level + 1) .. inline:_debug(level + 1) .. ", " end text = text .. "\n" .. string.rep(" ", level) end text = text .. ")" return text end ---@class PlainText: InlineElement local PlainText = InlineElement:derive("PlainText") ---@param value string ---@return PlainText function PlainText:new(value) local o = InlineElement.new(self) o.value = value setmetatable(o, self) return o end ---@class Formatted: InlineElement ---@field formatting table? local Formatted = InlineElement:derive("Formatted") ---@param inlines InlineElement[] ---@param formatting table? ---@return Formatted function Formatted:new(inlines, formatting) local o = InlineElement.new(self) o.inlines = inlines o.formatting = formatting setmetatable(o, self) return o end ---@class Micro: InlineElement local Micro = InlineElement:derive("Micro") -- This is how we can flip-flop only user-supplied styling. -- Inside this is parsed micro html ---@param inlines InlineElement[] ---@return Micro function Micro:new(inlines) local o = InlineElement.new(self) o.inlines = inlines setmetatable(o, self) return o end ---@class Quoted: InlineElement ---@field is_inner boolean ---@field quotes LocalizedQuotes local Quoted = InlineElement:derive("Quoted") ---@param inlines InlineElement[] ---@param localized_quotes LocalizedQuotes? ---@param is_inner boolean? ---@return Quoted function Quoted:new(inlines, localized_quotes, is_inner) local o = InlineElement.new(self) o.inlines = inlines o.is_inner = is_inner or false if localized_quotes then o.quotes = localized_quotes else o.quotes = LocalizedQuotes:new() end setmetatable(o, self) return o end ---@class Code: InlineElement local Code = InlineElement:derive("Code") ---@param value string ---@return Code function Code:new(value) local o = InlineElement.new(self) o.value = value setmetatable(o, self) return o end ---@class MathML: InlineElement local MathML = InlineElement:derive("MathML") ---@param value string ---@return MathML function MathML:new(value) local o = InlineElement.new(self) o.value = value setmetatable(o, self) return o end ---@class MathTeX: InlineElement local MathTeX = InlineElement:derive("MathTeX") ---@param value string ---@return MathTeX function MathTeX:new(value) local o = InlineElement.new(self) o.value = value setmetatable(o, self) return o end ---@class NoCase: InlineElement local NoCase = InlineElement:derive("NoCase") ---@param inlines InlineElement[] ---@return NoCase function NoCase:new(inlines) local o = InlineElement.new(self) o.inlines = inlines setmetatable(o, self) return o end ---@class NoDecor: InlineElement local NoDecor = InlineElement:derive("NoDecor") ---@param inlines InlineElement[] ---@return NoDecor function NoDecor:new(inlines) local o = InlineElement.new(self) o.inlines = inlines setmetatable(o, self) return o end ---@class Linked: InlineElement ---@field href string local Linked = InlineElement:derive("Linked") ---@param value string ---@param href string ---@return Linked function Linked:new(value, href) local o = InlineElement.new(self) o.value = value o.href = href setmetatable(o, self) return o end ---@class Div: InlineElement ---@field div table? local Div = InlineElement:derive("Div") ---@param inlines InlineElement[] ---@param display table? ---@return Div function Div:new(inlines, display) local o = InlineElement.new(self) o.inlines = inlines o.div = display setmetatable(o, self) return o end ---@class CiteInline: InlineElement ---@field cite_item CitationItem local CiteInline = InlineElement:derive("CiteInline") ---@param inlines InlineElement[] ---@param cite_item CitationItem ---@return CiteInline function CiteInline:new(inlines, cite_item) local o = InlineElement.new(self) o.inlines = inlines o.cite_item = cite_item setmetatable(o, self) return o end ---@class UndefinedCite: InlineElement ---@field cite_item CitationItem local UndefinedCite = InlineElement:derive("UndefinedCite") ---@param inlines InlineElement[] ---@param cite_item CitationItem ---@return UndefinedCite function UndefinedCite:new(inlines, cite_item) local o = InlineElement.new(self) o.inlines = inlines o.cite_item = cite_item setmetatable(o, self) return o end ---@param text string ---@param context Context? ---@param is_external boolean? ---@return InlineElement[] function InlineElement:parse(text, context, is_external) local text_type = type(text) local inlines if text_type == "table" then -- CSL rich text inlines = self:parse_csl_rich_text(text) elseif text_type == "string" then -- String with HTML-like formatting tags -- util.debug(text) inlines = self:parse_html_tags(text, context, is_external) -- util.debug(inlines) elseif text_type == "number" then inlines = {PlainText:new(tostring(text))} else util.error("Invalid text type") end return inlines end ---@param text string | (string | table)[] ---@return InlineElement[] function InlineElement:parse_csl_rich_text(text) -- Example: [ -- "A title with a", -- { -- "quote": "quoted string." -- } -- ] local inlines = {} local text_type = type(text) if text_type == "string" then table.insert(inlines, PlainText:new(text)) elseif text_type == "table" then for _, subtext in ipairs(text) do local subtext_type = type(subtext) local inline if subtext_type == "string" then inline = PlainText:new(subtext) elseif subtext_type == "table" then local format local content for format_, content_ in pairs(subtext) do format = format_ content = content_ end if format == "bold" then inline = Formatted:new(self:parse_csl_rich_text(content), {["font-weight"] = "bold"}) elseif format == "code" then if type(content) ~= "string" then util.error("Invalid rich text content.") end inline = Code:new(content) elseif format == "italic" then inline = Formatted:new(self:parse_csl_rich_text(content), {["font-style"] = "italic"}) elseif format == "math-ml" then if type(content) ~= "string" then util.error("Invalid rich text content.") end inline = Code:new(content) elseif format == "math-tex" then if type(content) ~= "string" then util.error("Invalid rich text content.") end inline = Code:new(content) elseif format == "preserve" then inline = NoCase:new(self:parse_csl_rich_text(content)) elseif format == "quote" then inline = Quoted:new(self:parse_csl_rich_text(content)) elseif format == "sc" then inline = Formatted:new(self:parse_csl_rich_text(content), {["font-variant"] = "small-caps"}) elseif format == "strike" then inline = Formatted:new(self:parse_csl_rich_text(content), {["strike-through"] = true}) elseif format == "sub" then inline = Formatted:new(self:parse_csl_rich_text(content), {["font-variant"] = "small-caps"}) elseif format == "sup" then inline = Formatted:new(self:parse_csl_rich_text(content), {["font-variant"] = "small-caps"}) end end table.insert(inlines, inline) end else util.error("Invalid text type") end return inlines end local P = lpeg.P local Ct = lpeg.Ct local Cp = lpeg.Cp -- Lua's regex doesn't support groups and thus we have to implement the same -- logic with `lpeg`. local code_pattern = Ct(Cp() * P("") * Cp()) * ((1 - P("")) ^ 0) * Ct(Cp() * P("") * Cp()) + Ct(Cp() * P("")) ^ 0) * Ct(Cp() * P("") * Cp()) + Ct(Cp() * P("
") * Cp()) * ((1 - P("
")) ^ 0) * Ct(Cp() * P("") * Cp()) + Ct(Cp() * P("") * Cp()) * ((1 - P("")) ^ 0) * Ct(Cp() * P("") * Cp()) + Ct(Cp() * P("") * Cp()) * ((1 - P("")) ^ 0) * Ct(Cp() * P("") * Cp()) local basic_tag_pattern = P '' + P '' + P '' + P '' + P '' + P '' + P '' + P '' + P ' "' + P " '" + P '("' + P "('" + P "“" + P "‘" + P '' + P '' + P '' + P '' + P '' + P '' + P '"' + P "'" + P "”" + P "’" local default_tag_pattern = Ct((code_pattern + Ct(Cp() * basic_tag_pattern * Cp()) + P(1)) ^ 0) local default_openers_info = { [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, [' "'] = { closer = '"', quotes = true, }, [" '"] = { closer = "'", quotes = true, }, ["“"] = { closer = "”", quotes = true, }, ["‘"] = { closer = "’", quotes = true, }, [''] = { closer = "", quotes = false, }, ['", quotes = false, }, ['
'] = {
    closer = "
", quotes = false, }, [''] = { closer = "", quotes = false, }, [''] = { closer = "", quotes = false, }, } local function _quoted(str) str = string.gsub(str, "'", "\'") return string.format("'%s'", str) end ---@param locale string ---@param context Context local function make_locale_tag_info(locale, context) if context.engine.locale_tags_info_dict[locale] then return end local localed_quotes = context:get_localized_quotes() local tag_pattern = basic_tag_pattern local openers_info = util.deep_copy(default_openers_info) if localed_quotes.outer_open and localed_quotes.outer_close then tag_pattern = tag_pattern + P(_quoted(localed_quotes.outer_open)) tag_pattern = tag_pattern + P(_quoted(localed_quotes.outer_close)) openers_info[localed_quotes.outer_open] = { closer = localed_quotes.outer_close, quotes = true, inner = false } end if localed_quotes.inner_open and localed_quotes.inner_close then tag_pattern = tag_pattern + P(_quoted(localed_quotes.inner_open)) tag_pattern = tag_pattern + P(_quoted(localed_quotes.inner_close)) openers_info[localed_quotes.inner_open] = { closer = localed_quotes.inner_close, quotes = true, inner = true } end context.engine.locale_tags_info_dict[locale] = { tag_pattern = Ct((code_pattern + Ct(Cp() * tag_pattern * Cp()) + P(1)) ^ 0), openers_info = openers_info, } end local straight_quotes_flip = { [" '"] = ' "', [' "'] = " '", }; ---@param str any ---@param context Context? ---@return string[] ---@return string[] local function split_tags_and_strings(str, context) local tags = {} local strings = {} str = string.gsub(str, '(]*(>)', '%1 %2%3;"%4'); str = string.gsub(str, '(]*(>)', "%1 %2%3"); str = string.gsub(str, '(]*(>)', "%1 %2%3"); local tag_pattern = default_tag_pattern local openers_info = default_openers_info if context and context.lang then tag_pattern = context.engine.locale_tags_info_dict[context.lang].tag_pattern end local tag_positions_list = lpeg.match(tag_pattern, str) if not tag_positions_list then error('Pattern not match') end local start = 1 local stop = 1 local new_stop = 1 for _, postion_tuple in ipairs(tag_positions_list) do start, new_stop = table.unpack(postion_tuple) table.insert(strings, string.sub(str, stop, start - 1)) table.insert(tags, string.sub(str, start, new_stop - 1)) stop = new_stop end table.insert(strings, string.sub(str, stop, -1)) for i, tag in ipairs(tags) do if string.match(tag, "^.['\"]$") then strings[i] = strings[i] .. string.sub(tag, 1, 1) tags[i] = " " .. string.sub(tag, 2) elseif (tag == "'" or tag == '"') and strings[i] == "" and (i == 1 or openers_info[tags[i - 1]]) then -- See `bugreports_NoCaseEscape.txt`. -- '"PIAAC-Longitudinal (PIAAC-L) 2015"' -- \"PIAAC-Longitudinal (PIAAC-Lx) 2015\" tags[i] = " " .. tag end end return tags, strings end ---@param tag string ---@param str string ---@return string? local function _apostrophe_force(tag, str) if tag == "'" or tag == "’" then if str ~= "" and string.match(str, "^[^,.?:; ]") then return util.unicode["right single quotation mark"] end elseif (tag == " '" or tag == "’") and str ~= "" and string.match(str, "^%s") then return util.unicode["right single quotation mark"] end return nil end ---@param quote string ---@param openers_info table local function set_outer_quote_form(quote, openers_info) openers_info[quote].inner = false; openers_info[straight_quotes_flip[quote]].inner = true; end ---@param tag string ---@param inlines InlineElement[] ---@param openers_info table ---@param context Context ---@return InlineElement local function make_inline_from_tag(tag, inlines, openers_info, context) if tag == '' then return NoCase:new(inlines) elseif tag == '' then return NoDecor:new(inlines) elseif tag == '' then return Formatted:new(inlines, {["font-variant"] = "small-caps"}) elseif tag == '' then return Formatted:new(inlines, {["font-variant"] = "small-caps"}) elseif tag == '' then return Formatted:new(inlines, {["font-style"] = "italic"}) elseif tag == '' then return Formatted:new(inlines, {["font-weight"] = "bold"}) elseif tag == '' then return Formatted:new(inlines, {["vertical-align"] = "sup"}) elseif tag == '' then return Formatted:new(inlines, {["vertical-align"] = "sub"}) elseif openers_info[tag] and openers_info[tag].quotes then local localized_quotes = context:get_localized_quotes() return Quoted:new(inlines, localized_quotes, openers_info[tag].inner) elseif tag == "" or tag == "