DocBook: ensure that <info>-related insets in the abstract are not generated in the abstract.

This helps generate more conformant DocBook files.

Also implement wrapper tags for InsetText.
This commit is contained in:
Thibaut Cuvelier 2020-11-18 01:51:05 +01:00
parent 5474c3fb4b
commit 2c6537ff66
9 changed files with 381 additions and 61 deletions

View File

@ -0,0 +1,215 @@
#LyX 2.4 created this file. For more info see https://www.lyx.org/
\lyxformat 599
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass svglobal3
\begin_preamble
\RequirePackage{fix-cm}
\smartqed % flush right qed marks, e.g. at end of proof
\end_preamble
\use_default_options true
\maintain_unincluded_children no
\language english
\language_package default
\inputencoding utf8
\fontencoding auto
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_roman_osf false
\font_sans_osf false
\font_typewriter_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype false
\use_dash_ligatures false
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command bibtex
\index_command default
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 0
\use_minted 0
\use_lineno 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\tablestyle default
\tracking_changes false
\output_changes false
\change_bars false
\postpone_fragile_content false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\docbook_table_output 0
\end_header
\begin_body
\begin_layout Title
Title
\end_layout
\begin_layout Abstract
Abstract text.
\begin_inset Flex Keywords
status open
\begin_layout Plain Layout
First keyword
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
Second keyword
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
More
\end_layout
\end_inset
\begin_inset Flex PACS
status open
\begin_layout Plain Layout
PACS code1
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
PACS code2
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
more
\end_layout
\end_inset
\begin_inset Flex Subclass
status open
\begin_layout Plain Layout
MSC code1
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
MSC code2
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
more
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Your text comes here.
Separate text sections with
\end_layout
\end_body
\end_document

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- This DocBook file was created by LyX 2.4.0dev
See http://www.lyx.org/ for more information -->
<article xml:lang="en_US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.2">
<info>
<title>Title</title>
<subjectset role='pacs'><subject>PACS code1 <!-- \and -->
PACS code2 <!-- \and -->
more</subject></subjectset><subjectset role='mcs'><subject>MSC code1 <!-- \and -->
MSC code2 <!-- \and -->
more</subject></subjectset><keywordset><keyword>First keyword <!-- \and -->
Second keyword <!-- \and -->
More</keyword></keywordset><abstract>
<para>Abstract text. </para>
</abstract>
</info>
<para>Your text comes here. Separate text sections with</para>
</article>

View File

@ -83,6 +83,11 @@ InsetLayout Flex:Subclass
CopyStyle Flex:Keywords
LatexName subclass
LabelString "Mathematics Subject Classification"
DocBookTag subject
DocBookTagType paragraph
DocBookWrapperTag subjectset
DocBookWrapperAttr role='mcs'
DocBookInInfo always
End
InsetLayout Flex:CRSC

View File

@ -376,6 +376,9 @@ public:
/// Is the current context a table?
bool docbook_in_table = false;
/// Should the layouts that should/must go into <info> be generated?
bool docbook_generate_info = true;
/// Are we generating this material for inclusion in a TOC-like entity?
bool for_toc = false;

View File

@ -3365,7 +3365,7 @@ std::vector<docstring> Paragraph::simpleDocBookOnePar(Buffer const & buf,
// If this is an InsetNewline, generate a new paragraph. Also reset the fonts, so that tags are closed in
// this paragraph.
if (getInset(i) != nullptr && getInset(i)->lyxCode() == NEWLINE_CODE) {
if (getInset(i) && getInset(i)->lyxCode() == NEWLINE_CODE) {
if (!ignore_fonts)
xs->closeFontTags();

View File

@ -91,9 +91,10 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
IL_HTMLSTYLE,
IL_HTMLPREAMBLE,
IL_DOCBOOKTAG,
IL_DOCBOOKTAGTYPE,
IL_DOCBOOKATTR,
IL_DOCBOOKTAGTYPE,
IL_DOCBOOKSECTION,
IL_DOCBOOKININFO,
IL_DOCBOOKWRAPPERTAG,
IL_DOCBOOKWRAPPERTAGTYPE,
IL_DOCBOOKWRAPPERATTR,
@ -142,6 +143,7 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
{ "decoration", IL_DECORATION },
{ "display", IL_DISPLAY },
{ "docbookattr", IL_DOCBOOKATTR },
{ "docbookininfo", IL_DOCBOOKININFO },
{ "docbooksection", IL_DOCBOOKSECTION },
{ "docbooktag", IL_DOCBOOKTAG },
{ "docbooktagtype", IL_DOCBOOKTAGTYPE },
@ -491,11 +493,14 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
case IL_DOCBOOKTAG:
lex >> docbooktag_;
break;
case IL_DOCBOOKATTR:
lex >> docbookattr_;
break;
case IL_DOCBOOKTAGTYPE:
lex >> docbooktagtype_;
break;
case IL_DOCBOOKATTR:
lex >> docbookattr_;
case IL_DOCBOOKININFO:
lex >> docbookininfo_;
break;
case IL_DOCBOOKSECTION:
lex >> docbooksection_;
@ -638,6 +643,17 @@ docstring InsetLayout::htmlstyle() const
return retval;
}
std::string const & InsetLayout::docbookininfo() const
{
// Same as Layout::docbookininfo.
// Indeed, a trilean. Only titles should be "maybe": otherwise, metadata is "always", content is "never".
if (docbookininfo_.empty() || (docbookininfo_ != "never" && docbookininfo_ != "always" && docbookininfo_ != "maybe"))
docbookininfo_ = "never";
return docbookininfo_;
}
void InsetLayout::readArgument(Lexer & lex)
{
Layout::latexarg arg;

View File

@ -154,6 +154,8 @@ public:
///
std::string docbookattr() const { return docbookattr_; }
///
std::string const & docbookininfo() const;
///
bool docbooksection() const { return docbooksection_; }
///
std::string docbookwrappertag() const { return docbookwrappertag_; }
@ -295,6 +297,8 @@ private:
///
std::string docbookattr_;
///
mutable std::string docbookininfo_;
///
bool docbooksection_ = false;
///
std::string docbookwrappertag_;

View File

@ -616,16 +616,28 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op
}
InsetLayout const & il = getLayout();
if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") {
docstring attrs = docstring();
if (!il.docbookattr().empty())
attrs += from_ascii(il.docbookattr());
if (il.docbooktag() == "link")
attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\"");
xs << xml::StartTag(il.docbooktag(), attrs);
// Maybe this is an <info> paragraph that should not be generated at all (i.e. right now, its place is somewhere
// else, typically outside the current paragraph).
if (!rp.docbook_generate_info && il.docbookininfo() != "never")
return;
// Start outputting this inset.
if (opts & WriteOuterTag) {
if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE")
xs << xml::StartTag(il.docbookwrappertag(), il.docbookwrapperattr());
if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") {
docstring attrs = docstring();
if (!il.docbookattr().empty())
attrs += from_ascii(il.docbookattr());
if (il.docbooktag() == "link")
attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\"");
xs << xml::StartTag(il.docbooktag(), attrs);
}
}
// No need for labels that are generated from counters.
// No need for labels that are generated from counters. They should be handled by the external DocBook processor.
// With respect to XHTML, paragraphs are still allowed here.
if (!allowMultiPar())
@ -637,8 +649,13 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op
docbookParagraphs(text_, buffer(), xs, runparams);
xs.endDivision();
if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE")
xs << xml::EndTag(il.docbooktag());
if (opts & WriteOuterTag) {
if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE")
xs << xml::EndTag(il.docbooktag());
if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE")
xs << xml::EndTag(il.docbookwrappertag());
}
}

View File

@ -160,11 +160,10 @@ string fontToAttribute(xml::FontTypes type) {
// If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
// for the font.
string role = fontToRole(type);
if (!role.empty()) {
if (!role.empty())
return "role='" + role + "'";
} else {
else
return "";
}
}
@ -412,10 +411,6 @@ void makeParagraph(
OutputParams const & runparams,
ParagraphList::const_iterator const & par)
{
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return;
// Useful variables.
auto const begin = text.paragraphs().begin();
auto const end = text.paragraphs().end();
@ -511,7 +506,7 @@ void makeParagraph(
// or we're not in the last paragraph, anyway.
// (ii) We didn't open it and docbook_in_par is true,
// but we are in the first par, and there is a next par.
bool const close_par = open_par && (!runparams.docbook_in_par);
bool const close_par = open_par && !runparams.docbook_in_par;
// Determine if this paragraph has some real content. Things like new pages are not caught
// by Paragraph::empty(), even though they do not generate anything useful in DocBook.
@ -542,10 +537,6 @@ void makeEnvironment(Text const &text,
OutputParams const &runparams,
ParagraphList::const_iterator const & par)
{
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return;
// Useful variables.
auto const end = text.paragraphs().end();
auto nextpar = par;
@ -648,13 +639,6 @@ ParagraphList::const_iterator makeListEnvironment(Text const &text,
auto const end = text.paragraphs().end();
auto const envend = findEndOfEnvironment(par, end);
// If this kind of layout should be ignored, already leave.
if (begin->layout().docbooktag() == "IGNORE") {
auto nextpar = par;
++nextpar;
return nextpar;
}
// Output the opening tag for this environment.
Layout const & envstyle = par->layout();
openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype());
@ -741,9 +725,6 @@ void makeCommand(
OutputParams const & runparams,
ParagraphList::const_iterator const & par)
{
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return;
// Useful variables.
// Unlike XHTML, no need for labels, as they are handled by DocBook tags.
@ -909,30 +890,77 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
} // end anonymous namespace
std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par)
{
// This function has a structure highly similar to makeAny and its friends. It's only made to be called on what
// should become the document's <abstract>.
std::set<const Inset *> values;
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return values;
// If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that
// DocBook doesn't want to be inside the abstract.
for (pos_type i = 0; i < par->size(); ++i) {
if (par->getInset(i) && par->getInset(i)->asInsetText()) {
InsetText const *inset = par->getInset(i)->asInsetText();
if (inset->getLayout().docbookininfo() != "never") {
values.insert(inset);
} else {
auto subpar = inset->paragraphs().begin();
while (subpar != inset->paragraphs().end()) {
values.merge(gatherInfo(subpar));
++subpar;
}
}
}
}
return values;
}
ParagraphList::const_iterator makeAny(Text const &text,
Buffer const &buf,
XMLStream &xs,
OutputParams const &runparams,
ParagraphList::const_iterator par)
{
switch (par->layout().latextype) {
case LATEX_COMMAND:
makeCommand(text, buf, xs, runparams, par);
break;
case LATEX_ENVIRONMENT:
makeEnvironment(text, buf, xs, runparams, par);
break;
case LATEX_LIST_ENVIRONMENT:
case LATEX_ITEM_ENVIRONMENT:
// Only case when makeAny() might consume more than one paragraph.
return makeListEnvironment(text, buf, xs, runparams, par);
case LATEX_PARAGRAPH:
makeParagraph(text, buf, xs, runparams, par);
break;
case LATEX_BIB_ENVIRONMENT:
makeBibliography(text, buf, xs, runparams, par);
break;
bool ignoreParagraph = false;
// If this kind of layout should be ignored, already leave.
ignoreParagraph |= par->layout().docbooktag() == "IGNORE";
// For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the
// abstract itself.
bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract";
ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info;
// Switch on the type of paragraph to call the right handler.
if (!ignoreParagraph) {
switch (par->layout().latextype) {
case LATEX_COMMAND:
makeCommand(text, buf, xs, runparams, par);
break;
case LATEX_ENVIRONMENT:
makeEnvironment(text, buf, xs, runparams, par);
break;
case LATEX_LIST_ENVIRONMENT:
case LATEX_ITEM_ENVIRONMENT:
// Only case when makeAny() might consume more than one paragraph.
return makeListEnvironment(text, buf, xs, runparams, par);
case LATEX_PARAGRAPH:
makeParagraph(text, buf, xs, runparams, par);
break;
case LATEX_BIB_ENVIRONMENT:
makeBibliography(text, buf, xs, runparams, par);
break;
}
}
// For cases that are not lists, the next paragraph to handle is the next one.
++par;
return par;
}
@ -964,6 +992,9 @@ void outputDocBookInfo(
// This check must be performed *before* a decision on whether or not to output <info> is made.
bool hasAbstract = !info.abstract.empty();
docstring abstract;
set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract>
// paragraph. (This happens for quite a few layouts, unfortunately.)
if (hasAbstract) {
// Generate the abstract XML into a string before further checks.
// Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
@ -971,14 +1002,20 @@ void outputDocBookInfo(
odocstringstream os2;
XMLStream xs2(os2);
set<pit_type> doneParas;
auto rp = runparams;
rp.docbook_generate_info = false;
set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists).
for (auto const & p : info.abstract) {
if (doneParas.find(p) == doneParas.end()) {
auto oldPar = paragraphs.iterator_at(p);
auto newPar = makeAny(text, buf, xs2, runparams, oldPar);
auto newPar = makeAny(text, buf, xs2, rp, oldPar);
infoInsets.merge(gatherInfo(oldPar));
// Insert the indices of all the paragraphs that were just generated (typically, one).
// **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
// Otherwise, makeAny and makeListEnvironment would have to be adapted too.
pit_type id = p;
while (oldPar != newPar) {
doneParas.emplace(id);
@ -1009,13 +1046,11 @@ void outputDocBookInfo(
xs << xml::CR();
}
// Output the elements that should go in <info>, before and after the abstract.
// Output the elements that should go in <info>.
// - First, the title.
for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
// that mandating a wrapper like <info> would repel users. Thus, generate them first.
makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
for (auto pit : info.mustBeInInfo)
makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
// If there is no title, generate one (required for the document to be valid).
// This code is called for the main document, for table cells, etc., so be precise in this condition.
if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
@ -1025,8 +1060,14 @@ void outputDocBookInfo(
xs << xml::CR();
}
// Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
// it contains several paragraphs that are empty).
// - Then, other metadata.
for (auto pit : info.mustBeInInfo)
makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
for (auto const * inset : infoInsets)
inset->docbook(xs, runparams);
// - Finally, always output the abstract as the last item of the <info>, as it requires special treatment
// (especially if it contains several paragraphs that are empty).
if (hasAbstract) {
if (info.abstractLayout) {
xs << XMLStream::ESCAPE_NONE << abstract;