DocBook: ensure that <info>-related insets in the abstract are not generated in the abstract.

This helps generate more conformant DocBook files.

Also implement wrapper tags for InsetText.
This commit is contained in:
Thibaut Cuvelier 2020-11-18 01:51:05 +01:00
parent 5474c3fb4b
commit 2c6537ff66
9 changed files with 381 additions and 61 deletions

View File

@ -0,0 +1,215 @@
#LyX 2.4 created this file. For more info see https://www.lyx.org/
\lyxformat 599
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass svglobal3
\begin_preamble
\RequirePackage{fix-cm}
\smartqed % flush right qed marks, e.g. at end of proof
\end_preamble
\use_default_options true
\maintain_unincluded_children no
\language english
\language_package default
\inputencoding utf8
\fontencoding auto
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_roman_osf false
\font_sans_osf false
\font_typewriter_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype false
\use_dash_ligatures false
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command bibtex
\index_command default
\paperfontsize default
\spacing single
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\biblio_style plain
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 0
\use_minted 0
\use_lineno 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\tablestyle default
\tracking_changes false
\output_changes false
\change_bars false
\postpone_fragile_content false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\docbook_table_output 0
\end_header
\begin_body
\begin_layout Title
Title
\end_layout
\begin_layout Abstract
Abstract text.
\begin_inset Flex Keywords
status open
\begin_layout Plain Layout
First keyword
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
Second keyword
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
More
\end_layout
\end_inset
\begin_inset Flex PACS
status open
\begin_layout Plain Layout
PACS code1
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
PACS code2
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
more
\end_layout
\end_inset
\begin_inset Flex Subclass
status open
\begin_layout Plain Layout
MSC code1
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
MSC code2
\begin_inset ERT
status collapsed
\begin_layout Plain Layout
\backslash
and
\end_layout
\end_inset
more
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Your text comes here.
Separate text sections with
\end_layout
\end_body
\end_document

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- This DocBook file was created by LyX 2.4.0dev
See http://www.lyx.org/ for more information -->
<article xml:lang="en_US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.2">
<info>
<title>Title</title>
<subjectset role='pacs'><subject>PACS code1 <!-- \and -->
PACS code2 <!-- \and -->
more</subject></subjectset><subjectset role='mcs'><subject>MSC code1 <!-- \and -->
MSC code2 <!-- \and -->
more</subject></subjectset><keywordset><keyword>First keyword <!-- \and -->
Second keyword <!-- \and -->
More</keyword></keywordset><abstract>
<para>Abstract text. </para>
</abstract>
</info>
<para>Your text comes here. Separate text sections with</para>
</article>

View File

@ -83,6 +83,11 @@ InsetLayout Flex:Subclass
CopyStyle Flex:Keywords CopyStyle Flex:Keywords
LatexName subclass LatexName subclass
LabelString "Mathematics Subject Classification" LabelString "Mathematics Subject Classification"
DocBookTag subject
DocBookTagType paragraph
DocBookWrapperTag subjectset
DocBookWrapperAttr role='mcs'
DocBookInInfo always
End End
InsetLayout Flex:CRSC InsetLayout Flex:CRSC

View File

@ -376,6 +376,9 @@ public:
/// Is the current context a table? /// Is the current context a table?
bool docbook_in_table = false; bool docbook_in_table = false;
/// Should the layouts that should/must go into <info> be generated?
bool docbook_generate_info = true;
/// Are we generating this material for inclusion in a TOC-like entity? /// Are we generating this material for inclusion in a TOC-like entity?
bool for_toc = false; bool for_toc = false;

View File

@ -3365,7 +3365,7 @@ std::vector<docstring> Paragraph::simpleDocBookOnePar(Buffer const & buf,
// If this is an InsetNewline, generate a new paragraph. Also reset the fonts, so that tags are closed in // If this is an InsetNewline, generate a new paragraph. Also reset the fonts, so that tags are closed in
// this paragraph. // this paragraph.
if (getInset(i) != nullptr && getInset(i)->lyxCode() == NEWLINE_CODE) { if (getInset(i) && getInset(i)->lyxCode() == NEWLINE_CODE) {
if (!ignore_fonts) if (!ignore_fonts)
xs->closeFontTags(); xs->closeFontTags();

View File

@ -91,9 +91,10 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
IL_HTMLSTYLE, IL_HTMLSTYLE,
IL_HTMLPREAMBLE, IL_HTMLPREAMBLE,
IL_DOCBOOKTAG, IL_DOCBOOKTAG,
IL_DOCBOOKTAGTYPE,
IL_DOCBOOKATTR, IL_DOCBOOKATTR,
IL_DOCBOOKTAGTYPE,
IL_DOCBOOKSECTION, IL_DOCBOOKSECTION,
IL_DOCBOOKININFO,
IL_DOCBOOKWRAPPERTAG, IL_DOCBOOKWRAPPERTAG,
IL_DOCBOOKWRAPPERTAGTYPE, IL_DOCBOOKWRAPPERTAGTYPE,
IL_DOCBOOKWRAPPERATTR, IL_DOCBOOKWRAPPERATTR,
@ -142,6 +143,7 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
{ "decoration", IL_DECORATION }, { "decoration", IL_DECORATION },
{ "display", IL_DISPLAY }, { "display", IL_DISPLAY },
{ "docbookattr", IL_DOCBOOKATTR }, { "docbookattr", IL_DOCBOOKATTR },
{ "docbookininfo", IL_DOCBOOKININFO },
{ "docbooksection", IL_DOCBOOKSECTION }, { "docbooksection", IL_DOCBOOKSECTION },
{ "docbooktag", IL_DOCBOOKTAG }, { "docbooktag", IL_DOCBOOKTAG },
{ "docbooktagtype", IL_DOCBOOKTAGTYPE }, { "docbooktagtype", IL_DOCBOOKTAGTYPE },
@ -491,11 +493,14 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
case IL_DOCBOOKTAG: case IL_DOCBOOKTAG:
lex >> docbooktag_; lex >> docbooktag_;
break; break;
case IL_DOCBOOKATTR:
lex >> docbookattr_;
break;
case IL_DOCBOOKTAGTYPE: case IL_DOCBOOKTAGTYPE:
lex >> docbooktagtype_; lex >> docbooktagtype_;
break; break;
case IL_DOCBOOKATTR: case IL_DOCBOOKININFO:
lex >> docbookattr_; lex >> docbookininfo_;
break; break;
case IL_DOCBOOKSECTION: case IL_DOCBOOKSECTION:
lex >> docbooksection_; lex >> docbooksection_;
@ -638,6 +643,17 @@ docstring InsetLayout::htmlstyle() const
return retval; return retval;
} }
std::string const & InsetLayout::docbookininfo() const
{
// Same as Layout::docbookininfo.
// Indeed, a trilean. Only titles should be "maybe": otherwise, metadata is "always", content is "never".
if (docbookininfo_.empty() || (docbookininfo_ != "never" && docbookininfo_ != "always" && docbookininfo_ != "maybe"))
docbookininfo_ = "never";
return docbookininfo_;
}
void InsetLayout::readArgument(Lexer & lex) void InsetLayout::readArgument(Lexer & lex)
{ {
Layout::latexarg arg; Layout::latexarg arg;

View File

@ -154,6 +154,8 @@ public:
/// ///
std::string docbookattr() const { return docbookattr_; } std::string docbookattr() const { return docbookattr_; }
/// ///
std::string const & docbookininfo() const;
///
bool docbooksection() const { return docbooksection_; } bool docbooksection() const { return docbooksection_; }
/// ///
std::string docbookwrappertag() const { return docbookwrappertag_; } std::string docbookwrappertag() const { return docbookwrappertag_; }
@ -295,6 +297,8 @@ private:
/// ///
std::string docbookattr_; std::string docbookattr_;
/// ///
mutable std::string docbookininfo_;
///
bool docbooksection_ = false; bool docbooksection_ = false;
/// ///
std::string docbookwrappertag_; std::string docbookwrappertag_;

View File

@ -616,16 +616,28 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op
} }
InsetLayout const & il = getLayout(); InsetLayout const & il = getLayout();
if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") {
docstring attrs = docstring(); // Maybe this is an <info> paragraph that should not be generated at all (i.e. right now, its place is somewhere
if (!il.docbookattr().empty()) // else, typically outside the current paragraph).
attrs += from_ascii(il.docbookattr()); if (!rp.docbook_generate_info && il.docbookininfo() != "never")
if (il.docbooktag() == "link") return;
attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\"");
xs << xml::StartTag(il.docbooktag(), attrs); // Start outputting this inset.
if (opts & WriteOuterTag) {
if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE")
xs << xml::StartTag(il.docbookwrappertag(), il.docbookwrapperattr());
if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") {
docstring attrs = docstring();
if (!il.docbookattr().empty())
attrs += from_ascii(il.docbookattr());
if (il.docbooktag() == "link")
attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\"");
xs << xml::StartTag(il.docbooktag(), attrs);
}
} }
// No need for labels that are generated from counters. // No need for labels that are generated from counters. They should be handled by the external DocBook processor.
// With respect to XHTML, paragraphs are still allowed here. // With respect to XHTML, paragraphs are still allowed here.
if (!allowMultiPar()) if (!allowMultiPar())
@ -637,8 +649,13 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op
docbookParagraphs(text_, buffer(), xs, runparams); docbookParagraphs(text_, buffer(), xs, runparams);
xs.endDivision(); xs.endDivision();
if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") if (opts & WriteOuterTag) {
xs << xml::EndTag(il.docbooktag()); if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE")
xs << xml::EndTag(il.docbooktag());
if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE")
xs << xml::EndTag(il.docbookwrappertag());
}
} }

View File

@ -160,11 +160,10 @@ string fontToAttribute(xml::FontTypes type) {
// If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
// for the font. // for the font.
string role = fontToRole(type); string role = fontToRole(type);
if (!role.empty()) { if (!role.empty())
return "role='" + role + "'"; return "role='" + role + "'";
} else { else
return ""; return "";
}
} }
@ -412,10 +411,6 @@ void makeParagraph(
OutputParams const & runparams, OutputParams const & runparams,
ParagraphList::const_iterator const & par) ParagraphList::const_iterator const & par)
{ {
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return;
// Useful variables. // Useful variables.
auto const begin = text.paragraphs().begin(); auto const begin = text.paragraphs().begin();
auto const end = text.paragraphs().end(); auto const end = text.paragraphs().end();
@ -511,7 +506,7 @@ void makeParagraph(
// or we're not in the last paragraph, anyway. // or we're not in the last paragraph, anyway.
// (ii) We didn't open it and docbook_in_par is true, // (ii) We didn't open it and docbook_in_par is true,
// but we are in the first par, and there is a next par. // but we are in the first par, and there is a next par.
bool const close_par = open_par && (!runparams.docbook_in_par); bool const close_par = open_par && !runparams.docbook_in_par;
// Determine if this paragraph has some real content. Things like new pages are not caught // Determine if this paragraph has some real content. Things like new pages are not caught
// by Paragraph::empty(), even though they do not generate anything useful in DocBook. // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
@ -542,10 +537,6 @@ void makeEnvironment(Text const &text,
OutputParams const &runparams, OutputParams const &runparams,
ParagraphList::const_iterator const & par) ParagraphList::const_iterator const & par)
{ {
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return;
// Useful variables. // Useful variables.
auto const end = text.paragraphs().end(); auto const end = text.paragraphs().end();
auto nextpar = par; auto nextpar = par;
@ -648,13 +639,6 @@ ParagraphList::const_iterator makeListEnvironment(Text const &text,
auto const end = text.paragraphs().end(); auto const end = text.paragraphs().end();
auto const envend = findEndOfEnvironment(par, end); auto const envend = findEndOfEnvironment(par, end);
// If this kind of layout should be ignored, already leave.
if (begin->layout().docbooktag() == "IGNORE") {
auto nextpar = par;
++nextpar;
return nextpar;
}
// Output the opening tag for this environment. // Output the opening tag for this environment.
Layout const & envstyle = par->layout(); Layout const & envstyle = par->layout();
openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype()); openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype());
@ -741,9 +725,6 @@ void makeCommand(
OutputParams const & runparams, OutputParams const & runparams,
ParagraphList::const_iterator const & par) ParagraphList::const_iterator const & par)
{ {
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return;
// Useful variables. // Useful variables.
// Unlike XHTML, no need for labels, as they are handled by DocBook tags. // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
@ -909,30 +890,77 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
} // end anonymous namespace } // end anonymous namespace
std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par)
{
// This function has a structure highly similar to makeAny and its friends. It's only made to be called on what
// should become the document's <abstract>.
std::set<const Inset *> values;
// If this kind of layout should be ignored, already leave.
if (par->layout().docbooktag() == "IGNORE")
return values;
// If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that
// DocBook doesn't want to be inside the abstract.
for (pos_type i = 0; i < par->size(); ++i) {
if (par->getInset(i) && par->getInset(i)->asInsetText()) {
InsetText const *inset = par->getInset(i)->asInsetText();
if (inset->getLayout().docbookininfo() != "never") {
values.insert(inset);
} else {
auto subpar = inset->paragraphs().begin();
while (subpar != inset->paragraphs().end()) {
values.merge(gatherInfo(subpar));
++subpar;
}
}
}
}
return values;
}
ParagraphList::const_iterator makeAny(Text const &text, ParagraphList::const_iterator makeAny(Text const &text,
Buffer const &buf, Buffer const &buf,
XMLStream &xs, XMLStream &xs,
OutputParams const &runparams, OutputParams const &runparams,
ParagraphList::const_iterator par) ParagraphList::const_iterator par)
{ {
switch (par->layout().latextype) { bool ignoreParagraph = false;
case LATEX_COMMAND:
makeCommand(text, buf, xs, runparams, par); // If this kind of layout should be ignored, already leave.
break; ignoreParagraph |= par->layout().docbooktag() == "IGNORE";
case LATEX_ENVIRONMENT:
makeEnvironment(text, buf, xs, runparams, par); // For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the
break; // abstract itself.
case LATEX_LIST_ENVIRONMENT: bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract";
case LATEX_ITEM_ENVIRONMENT: ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info;
// Only case when makeAny() might consume more than one paragraph.
return makeListEnvironment(text, buf, xs, runparams, par); // Switch on the type of paragraph to call the right handler.
case LATEX_PARAGRAPH: if (!ignoreParagraph) {
makeParagraph(text, buf, xs, runparams, par); switch (par->layout().latextype) {
break; case LATEX_COMMAND:
case LATEX_BIB_ENVIRONMENT: makeCommand(text, buf, xs, runparams, par);
makeBibliography(text, buf, xs, runparams, par); break;
break; case LATEX_ENVIRONMENT:
makeEnvironment(text, buf, xs, runparams, par);
break;
case LATEX_LIST_ENVIRONMENT:
case LATEX_ITEM_ENVIRONMENT:
// Only case when makeAny() might consume more than one paragraph.
return makeListEnvironment(text, buf, xs, runparams, par);
case LATEX_PARAGRAPH:
makeParagraph(text, buf, xs, runparams, par);
break;
case LATEX_BIB_ENVIRONMENT:
makeBibliography(text, buf, xs, runparams, par);
break;
}
} }
// For cases that are not lists, the next paragraph to handle is the next one.
++par; ++par;
return par; return par;
} }
@ -964,6 +992,9 @@ void outputDocBookInfo(
// This check must be performed *before* a decision on whether or not to output <info> is made. // This check must be performed *before* a decision on whether or not to output <info> is made.
bool hasAbstract = !info.abstract.empty(); bool hasAbstract = !info.abstract.empty();
docstring abstract; docstring abstract;
set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract>
// paragraph. (This happens for quite a few layouts, unfortunately.)
if (hasAbstract) { if (hasAbstract) {
// Generate the abstract XML into a string before further checks. // Generate the abstract XML into a string before further checks.
// Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
@ -971,14 +1002,20 @@ void outputDocBookInfo(
odocstringstream os2; odocstringstream os2;
XMLStream xs2(os2); XMLStream xs2(os2);
set<pit_type> doneParas; auto rp = runparams;
rp.docbook_generate_info = false;
set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists).
for (auto const & p : info.abstract) { for (auto const & p : info.abstract) {
if (doneParas.find(p) == doneParas.end()) { if (doneParas.find(p) == doneParas.end()) {
auto oldPar = paragraphs.iterator_at(p); auto oldPar = paragraphs.iterator_at(p);
auto newPar = makeAny(text, buf, xs2, runparams, oldPar); auto newPar = makeAny(text, buf, xs2, rp, oldPar);
infoInsets.merge(gatherInfo(oldPar));
// Insert the indices of all the paragraphs that were just generated (typically, one). // Insert the indices of all the paragraphs that were just generated (typically, one).
// **Make the hypothesis that, when an abstract has a list, all its items are consecutive.** // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
// Otherwise, makeAny and makeListEnvironment would have to be adapted too.
pit_type id = p; pit_type id = p;
while (oldPar != newPar) { while (oldPar != newPar) {
doneParas.emplace(id); doneParas.emplace(id);
@ -1009,13 +1046,11 @@ void outputDocBookInfo(
xs << xml::CR(); xs << xml::CR();
} }
// Output the elements that should go in <info>, before and after the abstract. // Output the elements that should go in <info>.
// - First, the title.
for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
// that mandating a wrapper like <info> would repel users. Thus, generate them first. // that mandating a wrapper like <info> would repel users. Thus, generate them first.
makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit)); makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
for (auto pit : info.mustBeInInfo)
makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
// If there is no title, generate one (required for the document to be valid). // If there is no title, generate one (required for the document to be valid).
// This code is called for the main document, for table cells, etc., so be precise in this condition. // This code is called for the main document, for table cells, etc., so be precise in this condition.
if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) { if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
@ -1025,8 +1060,14 @@ void outputDocBookInfo(
xs << xml::CR(); xs << xml::CR();
} }
// Always output the abstract as the last item of the <info>, as it requires special treatment (especially if // - Then, other metadata.
// it contains several paragraphs that are empty). for (auto pit : info.mustBeInInfo)
makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
for (auto const * inset : infoInsets)
inset->docbook(xs, runparams);
// - Finally, always output the abstract as the last item of the <info>, as it requires special treatment
// (especially if it contains several paragraphs that are empty).
if (hasAbstract) { if (hasAbstract) {
if (info.abstractLayout) { if (info.abstractLayout) {
xs << XMLStream::ESCAPE_NONE << abstract; xs << XMLStream::ESCAPE_NONE << abstract;