Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(math): Minimal support for accents in MathML and TeX-like commands #2187

Merged
merged 1 commit into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/math/atoms.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ local atomType = {
over = 8, -- Unused for now (used for overlines etc. in The TeXbook)
under = 9, -- Unused for now (used for underlines etc. in The TeXbook)
accent = 10,
botaccent = 11, -- Unused for now but botaccent is encoded in our dictionary
botaccent = 11,
}

return { types = atomType }
105 changes: 78 additions & 27 deletions packages/math/base-elements.lua
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,18 @@ local function isNotEmpty (element)
return element and (element:is_a(elements.terminal) or #element.children > 0)
end

local function getAccentMode (mode)
-- Size unchanged but leave display mode
-- See MathML Core §3.4.3
if mode == mathMode.display then
return mathMode.text
end
if mode == mathMode.displayCramped then
return mathMode.textCramped
end
return mode
end

local function unwrapSingleElementMrow (elt)
-- CODE SMELL.
-- For \overset or \underset in LaTeX, MathML would use <mover> or <munder>.
Expand All @@ -748,10 +760,13 @@ local function unwrapSingleElementMrow (elt)
end
end

function elements.underOver:_init (base, sub, sup)
function elements.underOver:_init (attributes, base, sub, sup)
elements.mbox._init(self)
base = unwrapSingleElementMrow(base)
self.atom = base.atom
self.attributes = attributes or {}
self.attributes.accent = SU.boolean(self.attributes.accent, false)
self.attributes.accentunder = SU.boolean(self.attributes.accentunder, false)
self.base = base
self.sub = isNotEmpty(sub) and sub or nil
self.sup = isNotEmpty(sup) and sup or nil
Expand All @@ -771,10 +786,10 @@ function elements.underOver:styleChildren ()
self.base.mode = self.mode
end
if self.sub then
self.sub.mode = getSubscriptMode(self.mode)
self.sub.mode = self.attributes.accentunder and getAccentMode(self.mode) or getSubscriptMode(self.mode)
end
if self.sup then
self.sup.mode = getSuperscriptMode(self.mode)
self.sup.mode = self.attributes.accent and getAccentMode(self.mode) or getSuperscriptMode(self.mode)
end
end

Expand Down Expand Up @@ -816,7 +831,10 @@ function elements.underOver:_stretchyReshapeToBase (part)
end

function elements.underOver:shape ()
local constants = self:getMathMetrics().constants
local scaleDown = self:getScaleDown()
local isMovableLimits = SU.boolean(self.base and self.base.movablelimits, false)
local itCorr = self:calculateItalicsCorrection() * scaleDown
if not (self.mode == mathMode.display or self.mode == mathMode.displayCramped) and isMovableLimits then
-- When the base is a movable limit, the under/over scripts are not placed under/over the base,
-- but other to the right of it, when display mode is not used.
Expand All @@ -827,32 +845,54 @@ function elements.underOver:shape ()
elements.subscript.shape(self)
return
end
local constants = self:getMathMetrics().constants
local scaleDown = self:getScaleDown()
-- Determine relative Ys
if self.base then
self.base.relY = SILE.types.length(0)
end
if self.sub then
self:_stretchyReshapeToBase(self.sub)
self.sub.relY = self.base.depth
+ SILE.types.length(
math.max(
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber(),
constants.lowerLimitBaselineDropMin * scaleDown
-- TODO These rules are incomplete and even wrong if we were to fully implement MathML Core.
if self.attributes.accentunder then
self.sub.relY = self.base.depth
+ SILE.types.length(
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber()
-- We assume that the accent is aligned on the base.
)
)
else
self.sub.relY = self.base.depth
+ SILE.types.length(
math.max(
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber(),
constants.lowerLimitBaselineDropMin * scaleDown
)
)
end
end
if self.sup then
self:_stretchyReshapeToBase(self.sup)
self.sup.relY = 0
- self.base.height
- SILE.types.length(
math.max(
(constants.upperLimitGapMin * scaleDown + self.sup.depth):tonumber(),
constants.upperLimitBaselineRiseMin * scaleDown
-- TODO These rules are incomplete if we were to fully implement MathML Core.
if self.attributes.accent then
self.sup.relY = 0 - self.base.height
-- MathML Core wants to align on the accentBaseHeight...
local overShift = math.max(0, constants.accentBaseHeight * scaleDown - self.base.height:tonumber())
self.sup.relY = self.sup.relY - SILE.types.length(overShift)
-- HACK: .... but improperly dimensioned accents can overshoot the base glyph.
-- So we try some guesswork to correct this.
-- Typically some non-combining symbols are in this case...
local heuristics = 0.5 * constants.flattenedAccentBaseHeight + 0.5 * constants.accentBaseHeight
if self.sup.height > SILE.types.length(heuristics * scaleDown) then
self.sup.relY = self.sup.relY + SILE.types.length(constants.accentBaseHeight * scaleDown)
end
else
self.sup.relY = 0
- self.base.height
- SILE.types.length(
math.max(
(constants.upperLimitGapMin * scaleDown + self.sup.depth):tonumber(),
constants.upperLimitBaselineRiseMin * scaleDown
)
)
)
end
end
-- Determine relative Xs based on widest symbol
local widest, a, b
Expand Down Expand Up @@ -893,7 +933,6 @@ function elements.underOver:shape ()
if b then
b.relX = c - b.width / 2
end
local itCorr = self:calculateItalicsCorrection() * scaleDown
if self.sup then
self.sup.relX = self.sup.relX + itCorr / 2
end
Expand Down Expand Up @@ -1201,7 +1240,10 @@ end
function elements.text:_vertStretchyReshape (depth, height)
local hasStretched = self:_stretchyReshape(depth + height, true)
if hasStretched then
-- HACK: see output routine
-- RESCALING HACK: see output routine
-- We only do it if the scaling logic found constructions on the vertical block axis.
-- It's a dirty hack until we properly implement assembly of glyphs in the case we couldn't
-- find a big enough variant.
self.vertExpectedSz = height + depth
self.vertScalingRatio = (depth + height):tonumber() / (self.height:tonumber() + self.depth:tonumber())
self.height = height
Expand All @@ -1212,12 +1254,21 @@ end

function elements.text:_horizStretchyReshape (width)
local hasStretched = self:_stretchyReshape(width, false)
if hasStretched then
-- HACK: see output routine
self.horizScalingRatio = width:tonumber() / self.width:tonumber()
self.width = width
end
return hasStretched
if not hasStretched and width:tonumber() < self.width:tonumber() then
-- Never shrink glyphs, it looks ugly
return false
end
-- But if stretching couldn't be done, it will be ugly anyway, so we will force
-- a re-scaling of the glyph.
-- (So it slightly different from the vertical case, 'cause MathML just has one stretchy
-- attribute, whether for stretching on the vertical (block) or horizontal (inline) axis,
-- and we cannot know which axis is meant unless we implement yet another mapping table
-- as the one in the MathML Core appendices. Frankly, how many non-normative appendices
-- do we need to implement MathML correctly?)
-- RESCALING HACK: see output routine
self.horizScalingRatio = width:tonumber() / self.width:tonumber()
self.width = width
return true
end

function elements.text:output (x, y, line)
Expand Down Expand Up @@ -1356,7 +1407,7 @@ local function newSubscript (spec)
end

local function newUnderOver (spec)
return elements.underOver(spec.base, spec.sub, spec.sup)
return elements.underOver(spec.attributes, spec.base, spec.sub, spec.sup)
end

-- TODO replace with penlight equivalent
Expand Down
2 changes: 1 addition & 1 deletion packages/math/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ The \code{counter} or the direct value \code{number} is passed as a parameter to

\paragraph{Missing features}
This package still lacks support for some mathematical constructs, but hopefully we’ll get there.
Among unsupported constructs are: decorating symbols with so-called accents, such as arrows or hats, “over” or “under” braces, and line breaking inside a formula.
Among unsupported features, we can mention line breaking inside a formula.

\font:remove-fallback
\end{document}
Expand Down
122 changes: 113 additions & 9 deletions packages/math/texlike.lua
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,24 @@ local function isOperatorKind (tree, typeOfAtom)
return false
end

local function isMoveableLimits (tree)
local function isMoveableLimitsOrAlwaysStacked (tree)
if not tree then
return false -- safeguard
end
if tree.is_always_stacked then
-- We use an internal flag to mark commands that are always stacking
-- their sup/sub arguments, such as brace-like commands.
return true
end
if tree.command ~= "mo" then
return false
-- On the recursion:
-- MathML allows movablelimits on <mo> elements, but "embellished operators"
-- can be other elements inheriting the property from their "core operator",
-- see MathML Core §3.2.4.1, which is full of intricacies so we are probably
-- not even doing the right thing here.
-- On the hack:
-- See variant commands for limits further down.
return SU.boolean(tree.is_hacked_movablelimits, false) or isMoveableLimitsOrAlwaysStacked(tree[1])
end
if tree.options and SU.boolean(tree.options.movablelimits, false) then
return true
Expand Down Expand Up @@ -430,6 +445,9 @@ end
local function isAccentSymbol (symbol)
return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
end
local function isBottomAccentSymbol (symbol)
return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.botaccent
end

local function compileToMathML_aux (_, arg_env, tree)
if type(tree) == "string" then
Expand Down Expand Up @@ -565,14 +583,15 @@ local function compileToMathML_aux (_, arg_env, tree)
end
tree.options = {}
-- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
-- depending on whether the base is an operator with moveable limits.
elseif tree.id == "sup" and isMoveableLimits(tree[1]) then
-- depending on whether the base is an operator with moveable limits,
-- or a brace-like command.
elseif tree.id == "sup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "mover"
elseif tree.id == "sub" and isMoveableLimits(tree[1]) then
elseif tree.id == "sub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "munder"
elseif tree.id == "subsup" and isMoveableLimits(tree[1]) then
elseif tree.id == "subsup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "munderover"
elseif tree.id == "supsub" and isMoveableLimits(tree[1]) then
elseif tree.id == "supsub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "munderover"
local tmp = tree[2]
tree[2] = tree[3]
Expand Down Expand Up @@ -638,7 +657,7 @@ local function compileToMathML_aux (_, arg_env, tree)
elseif tree.id == "command" and symbols[tree.command] then
local atom = { id = "atom", [1] = symbols[tree.command] }
if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
-- LaTeX-style accents \vec{v} = <mover accent="true"><mi>v</mi><mo></mo></mover>
-- LaTeX-style accents \overrightarrow{v} = <mover accent="true"><mi>v</mi><mo>&#x20D7;</mo></mover>
local accent = {
id = "command",
command = "mover",
Expand All @@ -649,6 +668,18 @@ local function compileToMathML_aux (_, arg_env, tree)
accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
accent[2] = compileToMathML_aux(nil, arg_env, atom)
tree = accent
elseif isBottomAccentSymbol(symbols[tree.command]) and #tree > 0 then
-- LaTeX-style bottom accents \underleftarrow{v} = <munder accent="true"><mi>v</mi><mo>&#x20EE;</mo></munder>
local accent = {
id = "command",
command = "munder",
options = {
accentunder = "true",
},
}
accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
accent[2] = compileToMathML_aux(nil, arg_env, atom)
tree = accent
elseif #tree > 0 then
-- Play cool with LaTeX-style commands that don't take arguments:
-- Edge case for non-accent symbols so we don't loose bracketed groups
Expand Down Expand Up @@ -728,6 +759,80 @@ registerCommand("mn", { [1] = objType.str }, function (x)
return x
end)

-- Register a limit-like variant command
-- Variants of superior, inferior, projective and injective limits are special:
-- They accept a sub/sup behaving as a movablelimits, but also have a symbol
-- on top of the limit symbol, which is not a movablelimits.
-- I can't see in the MathML specification how to do this properly: MathML Core
-- seems to only allow movablelimits on <mo> elements, and <mover>/<munder> may
-- inherit that property from their "core operator", but in this case we do not
-- want the accent to be movable, only the limit sup/sub.
-- So we use a hack, and also avoid "\def" here to prevent unwanted mrows.
-- @tparam string name TeX command name
-- @tparam string command MathML command (mover or munder)
-- @tparam number symbol Unicode codepoint for the accent symbol
-- @tparam string text Text representation
local function registerVarLimits (name, command, symbol, text)
registerCommand(name, {}, function ()
local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
return {
command = command,
is_hacked_movablelimits = true, -- Internal flag to mark this as a hack
options = options,
{
command = "mo",
options = { atom = "op", movablelimits = false },
text,
},
{
command = "mo",
options = { accentunder = "true" },
luautf8.char(symbol),
},
}
end)
end
registerVarLimits("varlimsup", "mover", 0x203E, "lim") -- U+203E OVERLINE
registerVarLimits("varliminf", "munder", 0x203E, "lim") -- U+203E OVERLINE
registerVarLimits("varprojlim", "munder", 0x2190, "lim") -- U+2190 LEFTWARDS ARROW
registerVarLimits("varinjlim", "munder", 0x2192, "lim") -- U+2192 RIGHTWARDS ARROW

-- Register a brace-like commands.
-- Those symbols are accents per-se in MathML, and are non-combining in Unicode.
-- But TeX treats them as "pseudo-accent" stretchy symbols.
-- Moreover, they accept a sub/sup which is always stacked, and not movable.
-- So we use an internal flag.
-- We also avoid "\def" here to prevent unwanted mrows resulting from the
-- compilation of the argument.
-- @tparam string name TeX command name
-- @tparam string command MathML command (mover or munder)
-- @tparam number symbol Unicode codepoint for the brace symbol
local function registerBraceLikeCommands (name, command, symbol)
registerCommand(name, {
[1] = objType.tree,
}, function (tree)
local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
return {
command = command,
is_always_stacked = true, -- Internal flag to mark this as a brace-like command
options = options,
tree[1],
{
command = "mo",
options = { stretchy = "true" },
luautf8.char(symbol),
},
}
end)
end
-- Note: the following overriddes the default commands from xml-entities / unicode-math.
registerBraceLikeCommands("overbrace", "mover", 0x23DE) -- U+23DE TOP CURLY BRACKET
registerBraceLikeCommands("underbrace", "munder", 0x23DF) -- U+23DF BOTTOM CURLY BRACKET
registerBraceLikeCommands("overparen", "mover", 0x23DC) -- U+23DC TOP PARENTHESIS
registerBraceLikeCommands("underparen", "munder", 0x23DD) -- U+23DD BOTTOM PARENTHESIS
registerBraceLikeCommands("overbracket", "mover", 0x23B4) -- U+23B4 TOP SQUARE BRACKET
registerBraceLikeCommands("underbracket", "munder", 0x23B5) -- U+23B5 BOTTOM SQUARE BRACKET

compileToMathML(
nil,
{},
Expand All @@ -737,7 +842,6 @@ compileToMathML(
\def{sqrt}{\msqrt{#1}}
\def{bi}{\mi[mathvariant=bold-italic]{#1}}
\def{dsi}{\mi[mathvariant=double-struck]{#1}}
\def{vec}{\mover[accent=true]{#1}{\rightarrow}}

% From amsmath:
\def{to}{\mo[atom=bin]{→}}
Expand Down
Loading
Loading