--
-- Copyright (c) 2021-2025 Zeping Lee
-- Released under the MIT license.
-- Repository: https://github.com/zepinglee/citeproc-lua
--
local output_module = {}
local lpeg = require("lpeg")
local uni_utf8
local unicode
local dom
local ir_node
local util
local using_luatex, kpse = pcall(require, "kpse")
if using_luatex then
uni_utf8 = require("unicode").utf8
unicode = require("citeproc-unicode")
dom = require("luaxml-domobject")
ir_node = require("citeproc-ir-node")
util = require("citeproc-util")
else
uni_utf8 = require("lua-utf8")
unicode = require("citeproc.unicode")
dom = require("citeproc.luaxml.domobject")
ir_node = require("citeproc.ir-node")
util = require("citeproc.util")
end
local GroupVar = ir_node.GroupVar
---@class LocalizedQuotes
local LocalizedQuotes = {
outer_open = util.unicode['left double quotation mark'],
outer_close = util.unicode['right double quotation mark'],
inner_open = util.unicode['left single quotation mark'],
inner_close = util.unicode['right single quotation mark'],
punctuation_in_quote = false,
}
function LocalizedQuotes:new(outer_open, outer_close, inner_open, inner_close, punctuation_in_quote)
local o = {
outer_open = outer_open or util.unicode['left double quotation mark'],
outer_close = outer_close or util.unicode['right double quotation mark'],
inner_open = inner_open or util.unicode['left single quotation mark'],
inner_close = inner_close or util.unicode['right single quotation mark'],
punctuation_in_quote = punctuation_in_quote,
}
setmetatable(o, self)
self.__index = self
return o
end
-- Inspired by:
-- https://github.com/zotero/citeproc-rs/blob/master/crates/io/src/output/markup.rs#L67
-- https://hackage.haskell.org/package/pandoc-types-1.22.2.1/docs/Text-Pandoc-Definition.html
---@class InlineElement
---@field _type string
---@field _base_class string
---@field value string?
---@field inlines InlineElement[]?
local InlineElement = {
_type = "InlineElement",
_base_class = "InlineElement",
value = nil,
inlines = nil,
}
---comment
---@param class_name string
---@return table
function InlineElement:derive(class_name)
local o = {
_type = class_name,
}
-- self[class_name] = o
setmetatable(o, self)
self.__index = self
o.__index = o
return o
end
function InlineElement:new(inlines)
local o = {
inlines = inlines,
_type = self._type,
}
setmetatable(o, self)
return o
end
function InlineElement:_debug(level)
level = level or 0
local text = ""
if level == 0 then
text = "\n"
end
text = text .. self._type
if self.formatting then
text = text .. " ["
for attr, value in pairs(self.formatting) do
text = text .. attr .. '="' .. value .. '"'
end
text = text .. "] "
end
if self.is_inner then
text = text .. " [inner quotes]"
end
text = text .. "("
if self.value then
text = text .. '"' .. self.value .. '"'
elseif self.inlines then
for _, inline in ipairs(self.inlines) do
text = text .. "\n" .. string.rep(" ", level + 1) .. inline:_debug(level + 1) .. ", "
end
text = text .. "\n" .. string.rep(" ", level)
end
text = text .. ")"
return text
end
---@class PlainText: InlineElement
local PlainText = InlineElement:derive("PlainText")
---@param value string
---@return PlainText
function PlainText:new(value)
local o = InlineElement.new(self)
o.value = value
setmetatable(o, self)
return o
end
---@class Formatted: InlineElement
---@field formatting table?
local Formatted = InlineElement:derive("Formatted")
---@param inlines InlineElement[]
---@param formatting table?
---@return Formatted
function Formatted:new(inlines, formatting)
local o = InlineElement.new(self)
o.inlines = inlines
o.formatting = formatting
setmetatable(o, self)
return o
end
---@class Micro: InlineElement
local Micro = InlineElement:derive("Micro")
-- This is how we can flip-flop only user-supplied styling.
-- Inside this is parsed micro html
---@param inlines InlineElement[]
---@return Micro
function Micro:new(inlines)
local o = InlineElement.new(self)
o.inlines = inlines
setmetatable(o, self)
return o
end
---@class Quoted: InlineElement
---@field is_inner boolean
---@field quotes LocalizedQuotes
local Quoted = InlineElement:derive("Quoted")
---@param inlines InlineElement[]
---@param localized_quotes LocalizedQuotes?
---@param is_inner boolean?
---@return Quoted
function Quoted:new(inlines, localized_quotes, is_inner)
local o = InlineElement.new(self)
o.inlines = inlines
o.is_inner = is_inner or false
if localized_quotes then
o.quotes = localized_quotes
else
o.quotes = LocalizedQuotes:new()
end
setmetatable(o, self)
return o
end
---@class Code: InlineElement
local Code = InlineElement:derive("Code")
---@param value string
---@return Code
function Code:new(value)
local o = InlineElement.new(self)
o.value = value
setmetatable(o, self)
return o
end
---@class MathML: InlineElement
local MathML = InlineElement:derive("MathML")
---@param value string
---@return MathML
function MathML:new(value)
local o = InlineElement.new(self)
o.value = value
setmetatable(o, self)
return o
end
---@class MathTeX: InlineElement
local MathTeX = InlineElement:derive("MathTeX")
---@param value string
---@return MathTeX
function MathTeX:new(value)
local o = InlineElement.new(self)
o.value = value
setmetatable(o, self)
return o
end
---@class NoCase: InlineElement
local NoCase = InlineElement:derive("NoCase")
---@param inlines InlineElement[]
---@return NoCase
function NoCase:new(inlines)
local o = InlineElement.new(self)
o.inlines = inlines
setmetatable(o, self)
return o
end
---@class NoDecor: InlineElement
local NoDecor = InlineElement:derive("NoDecor")
---@param inlines InlineElement[]
---@return NoDecor
function NoDecor:new(inlines)
local o = InlineElement.new(self)
o.inlines = inlines
setmetatable(o, self)
return o
end
---@class Linked: InlineElement
---@field href string
local Linked = InlineElement:derive("Linked")
---@param value string
---@param href string
---@return Linked
function Linked:new(value, href)
local o = InlineElement.new(self)
o.value = value
o.href = href
setmetatable(o, self)
return o
end
---@class Div: InlineElement
---@field div table?
local Div = InlineElement:derive("Div")
---@param inlines InlineElement[]
---@param display table?
---@return Div
function Div:new(inlines, display)
local o = InlineElement.new(self)
o.inlines = inlines
o.div = display
setmetatable(o, self)
return o
end
---@class CiteInline: InlineElement
---@field cite_item CitationItem
local CiteInline = InlineElement:derive("CiteInline")
---@param inlines InlineElement[]
---@param cite_item CitationItem
---@return CiteInline
function CiteInline:new(inlines, cite_item)
local o = InlineElement.new(self)
o.inlines = inlines
o.cite_item = cite_item
setmetatable(o, self)
return o
end
---@class UndefinedCite: InlineElement
---@field cite_item CitationItem
local UndefinedCite = InlineElement:derive("UndefinedCite")
---@param inlines InlineElement[]
---@param cite_item CitationItem
---@return UndefinedCite
function UndefinedCite:new(inlines, cite_item)
local o = InlineElement.new(self)
o.inlines = inlines
o.cite_item = cite_item
setmetatable(o, self)
return o
end
---@param text string
---@param context Context?
---@param is_external boolean?
---@return InlineElement[]
function InlineElement:parse(text, context, is_external)
local text_type = type(text)
local inlines
if text_type == "table" then
-- CSL rich text
inlines = self:parse_csl_rich_text(text)
elseif text_type == "string" then
-- String with HTML-like formatting tags
-- util.debug(text)
inlines = self:parse_html_tags(text, context, is_external)
-- util.debug(inlines)
elseif text_type == "number" then
inlines = {PlainText:new(tostring(text))}
else
util.error("Invalid text type")
end
return inlines
end
---@param text string | (string | table)[]
---@return InlineElement[]
function InlineElement:parse_csl_rich_text(text)
-- Example: [
-- "A title with a",
-- {
-- "quote": "quoted string."
-- }
-- ]
local inlines = {}
local text_type = type(text)
if text_type == "string" then
table.insert(inlines, PlainText:new(text))
elseif text_type == "table" then
for _, subtext in ipairs(text) do
local subtext_type = type(subtext)
local inline
if subtext_type == "string" then
inline = PlainText:new(subtext)
elseif subtext_type == "table" then
local format
local content
for format_, content_ in pairs(subtext) do
format = format_
content = content_
end
if format == "bold" then
inline = Formatted:new(self:parse_csl_rich_text(content), {["font-weight"] = "bold"})
elseif format == "code" then
if type(content) ~= "string" then
util.error("Invalid rich text content.")
end
inline = Code:new(content)
elseif format == "italic" then
inline = Formatted:new(self:parse_csl_rich_text(content), {["font-style"] = "italic"})
elseif format == "math-ml" then
if type(content) ~= "string" then
util.error("Invalid rich text content.")
end
inline = Code:new(content)
elseif format == "math-tex" then
if type(content) ~= "string" then
util.error("Invalid rich text content.")
end
inline = Code:new(content)
elseif format == "preserve" then
inline = NoCase:new(self:parse_csl_rich_text(content))
elseif format == "quote" then
inline = Quoted:new(self:parse_csl_rich_text(content))
elseif format == "sc" then
inline = Formatted:new(self:parse_csl_rich_text(content), {["font-variant"] = "small-caps"})
elseif format == "strike" then
inline = Formatted:new(self:parse_csl_rich_text(content), {["strike-through"] = true})
elseif format == "sub" then
inline = Formatted:new(self:parse_csl_rich_text(content), {["font-variant"] = "small-caps"})
elseif format == "sup" then
inline = Formatted:new(self:parse_csl_rich_text(content), {["font-variant"] = "small-caps"})
end
end
table.insert(inlines, inline)
end
else
util.error("Invalid text type")
end
return inlines
end
local P = lpeg.P
local Ct = lpeg.Ct
local Cp = lpeg.Cp
-- Lua's regex doesn't support groups and thus we have to implement the same
-- logic with `lpeg`.
local code_pattern =
Ct(Cp() * P("") * Cp()) * ((1 - P("
")) ^ 0) *
Ct(Cp() * P("") * Cp())
+ Ct(Cp() * P("")) ^ 0) *
Ct(Cp() * P("") * Cp())
+ Ct(Cp() * P("
") * Cp()) * ((1 - P("")) ^ 0) * Ct(Cp() * P("") * Cp()) + Ct(Cp() * P("")) ^ 0) * Ct(Cp() * P("") * Cp()) + Ct(Cp() * P("
'] = {
closer = "
",
quotes = false,
},
['",
quotes = false,
},
[''] = { closer = "", quotes = false, }, ['", quotes = false, }, ['
" or tag == "