Skip to content

Commit

Permalink
feat(typesetters): Add content to text utility to the base typesetter
Browse files Browse the repository at this point in the history
See same commit in sile-typesetter/sile#2207
  • Loading branch information
Omikhleia authored and Didier Willis committed Jan 9, 2025
1 parent bf0d4f4 commit ba6dcdb
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions silex/typesetters/base.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1605,5 +1605,58 @@ function typesetter:liner (name, content, outputYourself)
end
end

--- Flatten a node list into just its string representation.
-- @tparam table nodes Typeset nodes
-- @treturn string Text reconstruction of the nodes
local function _nodesToText (nodes)
-- A real interword space width depends on several settings (depending on variable
-- spaces being enabled or not, etc.), and the computation below takes that into
-- account.
local iwspc = SILE.shaper:measureSpace(SILE.font.loadDefaults({}))
local iwspcmin = (iwspc.length - iwspc.shrink):tonumber()

local string = ""
for i = 1, #nodes do
local node = nodes[i]
if node.is_nnode or node.is_unshaped then
string = string .. node:toText()
elseif node.is_glue or node.is_kern then
-- What we want to avoid is "small" glues or kerns to be expanded as full
-- spaces.
-- Comparing them to half of the smallest width of a possibly shrinkable
-- interword space is fairly fragile and empirical: the content could contain
-- font changes, so the comparison is wrong in the general case.
-- It's a simplistic approach. We cannot really be sure what a "space" meant
-- at the point where the kern or glue got absolutized.
if node.width:tonumber() > iwspcmin * 0.5 then
string = string .. " "
end
elseif not (node.is_zerohbox or node.is_migrating) then
-- Here, typically, the main case is an hbox.
-- Even if extracting its content could be possible in some regular cases
-- we cannot take a general decision, as it is a versatile object and its
-- outputYourself() method could moreover have been redefined to do fancy
-- things. Better warn and skip.
SU.warn("Some content could not be converted to text: " .. node)
end
end
-- Trim leading and trailing spaces, and simplify internal spaces.
return pl.stringx.strip(string):gsub("%s%s+", " ")
end

--- Convert a SILE AST to a textual representation.
-- This is similar to SU.ast.contentToString(), but it performs a full
-- typesetting of the content, and then reconstructs the text from the
-- typeset nodes.
-- @tparam table content SILE AST to process
-- @treturn string Textual representation of the content
function typesetter:contentToText (content)
self:pushState()
self.state.hmodeOnly = true
SILE.process(content)
local text = _nodesToText(self.state.nodes)
self:popState()
return text
end

return typesetter

0 comments on commit ba6dcdb

Please sign in to comment.