diff --git a/R/parse.R b/R/parse.R index 6972404..f16bd4e 100644 --- a/R/parse.R +++ b/R/parse.R @@ -104,15 +104,13 @@ #' way. The `id` column refers the tokens back to the original input text, the #' `block` relates tokens together into blocks. Block elements increment the #' block count when they are entered, and decrement it when they are excited. -#' Note the output doesn't specify when a block or span is excited. Spans cannot -#' have children (they are closed when a new span starts), so they are excited -#' right away. Blocks are excited when a new tag with the same indentation is -#' encountered. The `type` column provides the type of the block. The -#' `indentation` column provides the node level in the tree. A child block will -#' increase the indentation for as long as it is active. `ol_index` provides the -#' number associated with the ordered list element. `tight` indicates whether -#' the list is tight (i.e. it was provided with no empty lines between -#' list elements). +#' The `type` column provides the type of the block. The `indentation` column +#' provides the node level in the tree. A child block will increase the +#' indentation for as long as it is active. `ol_index` provides the number +#' associated with the ordered list element. `tight` indicates whether the list +#' is tight (i.e. it was provided with no empty lines between list elements). +#' The `ends` column indicate until which row in the output the tag is active +#' (i.e. the tag is closed after the row indicated by the value in this column). #' #' @export #' diff --git a/man/marquee_parse.Rd b/man/marquee_parse.Rd index 38c8a90..0370eba 100644 --- a/man/marquee_parse.Rd +++ b/man/marquee_parse.Rd @@ -113,15 +113,13 @@ some additional information useful for rendering the output in the expected way. The \code{id} column refers the tokens back to the original input text, the \code{block} relates tokens together into blocks. Block elements increment the block count when they are entered, and decrement it when they are excited. -Note the output doesn't specify when a block or span is excited. Spans cannot -have children (they are closed when a new span starts), so they are excited -right away. Blocks are excited when a new tag with the same indentation is -encountered. The \code{type} column provides the type of the block. The -\code{indentation} column provides the node level in the tree. A child block will -increase the indentation for as long as it is active. \code{ol_index} provides the -number associated with the ordered list element. \code{tight} indicates whether -the list is tight (i.e. it was provided with no empty lines between -list elements). +The \code{type} column provides the type of the block. The \code{indentation} column +provides the node level in the tree. A child block will increase the +indentation for as long as it is active. \code{ol_index} provides the number +associated with the ordered list element. \code{tight} indicates whether the list +is tight (i.e. it was provided with no empty lines between list elements). +The \code{ends} column indicate until which row in the output the tag is active +(i.e. the tag is closed after the row indicated by the value in this column). } \examples{ diff --git a/src/marquee.cpp b/src/marquee.cpp index 8ad977d..29d3d21 100644 --- a/src/marquee.cpp +++ b/src/marquee.cpp @@ -22,6 +22,7 @@ using namespace std::string_literals; struct MARQUEE_DATA { std::stack style_stack; std::stack type_stack; + std::vector index_stack; std::stack offset_stack; std::stack tight_stack; cpp11::list_of defined_styles; @@ -33,6 +34,7 @@ struct MARQUEE_DATA { cpp11::writable::integers indent; cpp11::writable::integers ol_index; cpp11::writable::logicals tight; + cpp11::writable::integers until; R_xlen_t current_id; unsigned current_block; unsigned current_indent; @@ -41,6 +43,7 @@ struct MARQUEE_DATA { MARQUEE_DATA(cpp11::list_of styles) : style_stack(), type_stack({""}), + index_stack(), offset_stack({0}), tight_stack({false}), defined_styles(styles), @@ -52,6 +55,7 @@ struct MARQUEE_DATA { indent(), ol_index(), tight(), + until(), current_id(0), current_block(0), current_indent(0), @@ -151,6 +155,7 @@ inline cpp11::writable::list combine_styles(cpp11::list parent, cpp11::list def) inline void push_info(MARQUEE_DATA* userdata, std::string type, bool block = false, bool tight = false, int offset = 1) { userdata->type_stack.push(type); + userdata->index_stack.push_back(userdata->until.size()); cpp11::list style(userdata->defined_styles[type]); if (userdata->style_stack.empty()) { userdata->style_stack.push(style); @@ -179,6 +184,7 @@ inline void init_text(MARQUEE_DATA* userdata) { userdata->indent.push_back(userdata->current_indent); userdata->ol_index.push_back(userdata->offset_stack.top()); userdata->tight.push_back(userdata->tight_stack.top()); + userdata->until.push_back(userdata->until.size() + 1); } inline void pop_info(MARQUEE_DATA* userdata, std::string type, bool block = false) { @@ -186,6 +192,11 @@ inline void pop_info(MARQUEE_DATA* userdata, std::string type, bool block = fals userdata->style_stack.pop(); } userdata->type_stack.pop(); + size_t cur_line = userdata->until.size(); + for (size_t i = 0; i < userdata->index_stack.size(); ++i) { + userdata->until[userdata->index_stack[i]] = cur_line; + } + userdata->index_stack.pop_back(); if (block) { userdata->current_indent--; if (type != "li") { @@ -364,9 +375,10 @@ cpp11::writable::list marquee_c(cpp11::strings text, cpp11::list_of userdata.type, userdata.indent, userdata.ol_index, - userdata.tight + userdata.tight, + userdata.until }; - cpp11::writable::strings res_names = {"text", "id", "block", "type", "indentation", "ol_index", "tight"}; + cpp11::writable::strings res_names = {"text", "id", "block", "type", "indentation", "ol_index", "tight", "ends"}; cpp11::list doc_style(userdata.style[0]); double rem_size = REAL(doc_style[0])[0];