From a4600978234a442dc7b35315850aed25e6a1d138 Mon Sep 17 00:00:00 2001 From: Thibaut Cuvelier Date: Mon, 14 Feb 2022 03:03:22 +0100 Subject: [PATCH] DocBook: use the existing unicodesymbols global table to map LaTeX ERTs to Unicode characters. --- autotests/export/docbook/ert_convert.lyx | 172 +++++++++++ autotests/export/docbook/ert_convert.xml | 7 + src/insets/InsetERT.cpp | 347 ++--------------------- 3 files changed, 207 insertions(+), 319 deletions(-) create mode 100644 autotests/export/docbook/ert_convert.lyx create mode 100644 autotests/export/docbook/ert_convert.xml diff --git a/autotests/export/docbook/ert_convert.lyx b/autotests/export/docbook/ert_convert.lyx new file mode 100644 index 0000000000..1ccee0c64e --- /dev/null +++ b/autotests/export/docbook/ert_convert.lyx @@ -0,0 +1,172 @@ +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 609 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass article +\use_default_options true +\maintain_unincluded_children no +\language american +\language_package default +\inputencoding utf8 +\fontencoding auto +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_roman_osf false +\font_sans_osf false +\font_typewriter_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\float_placement class +\float_alignment class +\paperfontsize default +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_minted 0 +\use_lineno 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tablestyle default +\tracking_changes false +\output_changes false +\change_bars false +\postpone_fragile_content true +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\docbook_table_output 0 +\docbook_mathml_prefix 1 +\end_header + +\begin_body + +\begin_layout Title +ERT Conversions +\end_layout + +\begin_layout Standard +These should be +\family typewriter +À +\family default +: +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +`A +\end_layout + +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +`{A} +\end_layout + +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +` A +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +This one should be +\family typewriter +¡ +\family default +: +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +textexclamdown +\end_layout + +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + +!` +\end_layout + +\end_inset + + +\end_layout + +\end_body +\end_document diff --git a/autotests/export/docbook/ert_convert.xml b/autotests/export/docbook/ert_convert.xml new file mode 100644 index 0000000000..e21274b485 --- /dev/null +++ b/autotests/export/docbook/ert_convert.xml @@ -0,0 +1,7 @@ + + +
+ERT Conversions +These should be À: À À À +
\ No newline at end of file diff --git a/src/insets/InsetERT.cpp b/src/insets/InsetERT.cpp index 50b5daa363..00f497f031 100644 --- a/src/insets/InsetERT.cpp +++ b/src/insets/InsetERT.cpp @@ -28,6 +28,7 @@ #include "support/gettext.h" #include "support/lstrings.h" #include "support/TempFile.h" +#include "Encoding.h" #include #include @@ -88,311 +89,6 @@ int InsetERT::plaintext(odocstringstream & os, } -static const std::map raw_latex_encoding_to_unicode_xml{ - // Punctuation. - {from_ascii("\\textquotesingle"), from_ascii("'")}, - {from_ascii("!`"), from_ascii("¡")}, // inverted exclamation mark - {from_ascii("?`"), from_ascii("¿")}, // inverted interrogation mark - - // Logos. - {from_ascii("\\LaTeX"), from_ascii("LaTeX")}, - {from_ascii("\\LaTeXe"), from_ascii("LaTeX2ε")}, // LaTeX 2 epsilon - {from_ascii("\\LyX"), from_ascii("LyX")}, - {from_ascii("\\TeX"), from_ascii("TeX")}, - - // Accentuated letters (alphabetical order of description, ligatures after accents). - // Missing letters are only indicated for vowels (only few consonants have diacritics). - // Only symbols that can be expressed as one Unicode characters are present; symbols with more than one - // diacritic are also omitted. - {from_ascii("\\`{a}"), from_ascii("á")}, // a acute - {from_ascii("\\` a"), from_ascii("á")}, - {from_ascii("\\`{A}"), from_ascii("Á")}, // A acute - {from_ascii("\\` A"), from_ascii("Á")}, - {from_ascii("\\u{a}"), from_ascii("ă")}, // a breve - {from_ascii("\\u a"), from_ascii("ă")}, - {from_ascii("\\u{A}"), from_ascii("Ă")}, // A breve - {from_ascii("\\u A"), from_ascii("Ă")}, - {from_ascii("\\v{a}"), from_ascii("ǎ")}, // a caron - {from_ascii("\\v a"), from_ascii("ǎ")}, - {from_ascii("\\v{A}"), from_ascii("Ǎ")}, // A caron - {from_ascii("\\v A"), from_ascii("Ǎ")}, - // No a cedilla. - {from_ascii("\\^{a}"), from_ascii("â")}, // a circumflex - {from_ascii("\\^ a"), from_ascii("â")}, - {from_ascii("\\^{A}"), from_ascii("Â")}, // A circumflex - {from_ascii("\\^ A"), from_ascii("Â")}, - {from_ascii("\\\"{a}"), from_ascii("ä")}, // a diaeresis - {from_ascii("\\\" a"), from_ascii("ä")}, - {from_ascii("\\\"{A}"), from_ascii("Ä")}, // A diaeresis - {from_ascii("\\\" A"), from_ascii("Ä")}, - // No a double acute. - {from_ascii("\\`{a}"), from_ascii("à")}, // a grave - {from_ascii("\\` a"), from_ascii("à")}, - {from_ascii("\\`{A}"), from_ascii("À")}, // A grave - {from_ascii("\\` A"), from_ascii("À")}, - {from_ascii("\\~{a}"), from_ascii("ã")}, // a tilde - {from_ascii("\\~ a"), from_ascii("ã")}, - {from_ascii("\\~{A}"), from_ascii("Ã")}, // A tilde - {from_ascii("\\~ A"), from_ascii("Ã")}, - {from_ascii("\\aa"), from_ascii("å")}, // a ring - {from_ascii("\\r{a}"), from_ascii("å")}, - {from_ascii("\\r a"), from_ascii("å")}, - {from_ascii("\\AA"), from_ascii("Å")}, // A ring - {from_ascii("\\r{A}"), from_ascii("Å")}, - {from_ascii("\\r A"), from_ascii("Å")}, - {from_ascii("\\ae"), from_ascii("æ")}, // ae ligature - {from_ascii("\\AE"), from_ascii("Æ")}, // AE ligature - {from_ascii("\\v{c}"), from_ascii("č")}, // c caron - {from_ascii("\\v c"), from_ascii("č")}, - {from_ascii("\\v{C}"), from_ascii("Č")}, // C caron - {from_ascii("\\v C"), from_ascii("Č")}, - {from_ascii("\\c{c}"), from_ascii("Ç")}, // c cedilla - {from_ascii("\\c c"), from_ascii("Ç")}, - {from_ascii("\\c{C}"), from_ascii("ç")}, // C cedilla - {from_ascii("\\c C"), from_ascii("ç")}, - {from_ascii("\\v{d}"), from_ascii("ď")}, // d caron - {from_ascii("\\v d"), from_ascii("ď")}, - {from_ascii("\\v{D}"), from_ascii("Ď")}, // D caron - {from_ascii("\\v D"), from_ascii("Ď")}, - {from_ascii("\\`{e}"), from_ascii("é")}, // e acute - {from_ascii("\\` e"), from_ascii("é")}, - {from_ascii("\\`{E}"), from_ascii("É")}, // E acute - {from_ascii("\\` E"), from_ascii("É")}, - {from_ascii("\\u{e}"), from_ascii("ĕ")}, // e breve - {from_ascii("\\u e"), from_ascii("ĕ")}, - {from_ascii("\\u{E}"), from_ascii("Ĕ")}, // E breve - {from_ascii("\\u E"), from_ascii("Ĕ")}, - {from_ascii("\\v{e}"), from_ascii("ě")}, // e caron - {from_ascii("\\v e"), from_ascii("ě")}, - {from_ascii("\\v{E}"), from_ascii("Ě")}, // E caron - {from_ascii("\\v E"), from_ascii("Ě")}, - {from_ascii("\\c{e}"), from_ascii("ȩ")}, // e cedilla - {from_ascii("\\c e"), from_ascii("ȩ")}, - {from_ascii("\\c{E}"), from_ascii("Ȩ")}, // E cedilla - {from_ascii("\\c E"), from_ascii("Ȩ")}, - {from_ascii("\\^{e}"), from_ascii("ê")}, // e circumflex - {from_ascii("\\^ e"), from_ascii("ê")}, - {from_ascii("\\^{E}"), from_ascii("Ê")}, // E circumflex - {from_ascii("\\^ E"), from_ascii("Ê")}, - {from_ascii("\\\"{e}"), from_ascii("ë")}, // e diaeresis - {from_ascii("\\\" e"), from_ascii("ë")}, - {from_ascii("\\\"{E}"), from_ascii("Ë")}, // E diaeresis - {from_ascii("\\\" E"), from_ascii("Ë")}, - // No e double acute. - {from_ascii("\\`{e}"), from_ascii("è")}, // e grave - {from_ascii("\\` e"), from_ascii("è")}, - {from_ascii("\\`{E}"), from_ascii("È")}, // E grave - {from_ascii("\\` E"), from_ascii("È")}, - {from_ascii("\\~{e}"), from_ascii("ẽ")}, // e tilde - {from_ascii("\\~ e"), from_ascii("ẽ")}, - {from_ascii("\\~{E}"), from_ascii("Ẽ")}, // E tilde - {from_ascii("\\~ E"), from_ascii("Ẽ")}, - // No e ring. - {from_ascii("\\u{g}"), from_ascii("ğ")}, // g breve - {from_ascii("\\u g"), from_ascii("ğ")}, - {from_ascii("\\u{G}"), from_ascii("Ğ")}, // G breve - {from_ascii("\\u G"), from_ascii("Ğ")}, - {from_ascii("\\v{g}"), from_ascii("ǧ")}, // g caron - {from_ascii("\\v g"), from_ascii("ǧ")}, - {from_ascii("\\v{G}"), from_ascii("Ǧ")}, // G caron - {from_ascii("\\v G"), from_ascii("Ǧ")}, - {from_ascii("\\c{g}"), from_ascii("ģ")}, // g cedilla - {from_ascii("\\c g"), from_ascii("ģ")}, - {from_ascii("\\c{G}"), from_ascii("Ģ")}, // G cedilla - {from_ascii("\\c G"), from_ascii("Ģ")}, - {from_ascii("\\i"), from_ascii("ı")}, // i dotless - {from_ascii("\\`{i}"), from_ascii("í")}, // i acute - {from_ascii("\\` i"), from_ascii("í")}, - {from_ascii("\\`{I}"), from_ascii("Í")}, // I acute - {from_ascii("\\` I"), from_ascii("Í")}, - {from_ascii("\\u{i}"), from_ascii("ĭ")}, // i breve - {from_ascii("\\u i"), from_ascii("ĭ")}, - {from_ascii("\\u{I}"), from_ascii("Ĭ")}, // I breve - {from_ascii("\\u I"), from_ascii("Ĭ")}, - {from_ascii("\\v{i}"), from_ascii("ǐ")}, // i caron - {from_ascii("\\v i"), from_ascii("ǐ")}, - {from_ascii("\\v{I}"), from_ascii("Ǐ")}, // I caron - {from_ascii("\\v I"), from_ascii("Ǐ")}, - // No i cedilla. - {from_ascii("\\^{i}"), from_ascii("î")}, // i circumflex - {from_ascii("\\^ i"), from_ascii("î")}, - {from_ascii("\\^{I}"), from_ascii("Î")}, // I circumflex - {from_ascii("\\^ I"), from_ascii("ï")}, - {from_ascii("\\\"{i}"), from_ascii("ï")}, // i diaeresis - {from_ascii("\\\" i"), from_ascii("ë")}, - {from_ascii("\\\"{I}"), from_ascii("ë")}, // I diaeresis - {from_ascii("\\\" I"), from_ascii("Ï")}, - // No i double acute. - {from_ascii("\\`{i}"), from_ascii("ì")}, // i grave - {from_ascii("\\` i"), from_ascii("ì")}, - {from_ascii("\\`{I}"), from_ascii("Ì")}, // I grave - {from_ascii("\\` I"), from_ascii("Ì")}, - {from_ascii("\\~{i}"), from_ascii("ĩ")}, // i tilde - {from_ascii("\\~ i"), from_ascii("ĩ")}, - {from_ascii("\\~{I}"), from_ascii("Ĩ")}, // I tilde - {from_ascii("\\~ I"), from_ascii("Ĩ")}, - // No i ring. - {from_ascii("\\j"), from_ascii("ȷ")}, // j dotless - {from_ascii("\\v{k}"), from_ascii("Ǩ")}, // k caron - {from_ascii("\\v k"), from_ascii("Ǩ")}, - {from_ascii("\\v{K}"), from_ascii("ǩ")}, // K caron - {from_ascii("\\v K"), from_ascii("ǩ")}, - {from_ascii("\\c{k}"), from_ascii("ķ")}, // k cedilla - {from_ascii("\\c k"), from_ascii("ķ")}, - {from_ascii("\\c{K}"), from_ascii("Ķ")}, // K cedilla - {from_ascii("\\c K"), from_ascii("Ķ")}, - {from_ascii("\\v{l}"), from_ascii("ľ")}, // l caron - {from_ascii("\\v l"), from_ascii("ľ")}, - {from_ascii("\\v{L}"), from_ascii("Ľ")}, // L caron - {from_ascii("\\v L"), from_ascii("Ľ")}, - {from_ascii("\\c{l}"), from_ascii("ļ")}, // l cedilla - {from_ascii("\\c l"), from_ascii("ļ")}, - {from_ascii("\\c{L}"), from_ascii("Ļ")}, // L cedilla - {from_ascii("\\c L"), from_ascii("Ļ")}, - {from_ascii("\\l"), from_ascii("ł")}, // l stroke - {from_ascii("\\L"), from_ascii("Ł")}, // L stroke - {from_ascii("\\v{n}"), from_ascii("ň")}, // n caron - {from_ascii("\\v n"), from_ascii("ň")}, - {from_ascii("\\v{N}"), from_ascii("Ň")}, // N caron - {from_ascii("\\v N"), from_ascii("Ň")}, - {from_ascii("\\c{n}"), from_ascii("ņ")}, // n cedilla - {from_ascii("\\c n"), from_ascii("ņ")}, - {from_ascii("\\c{N}"), from_ascii("Ņ")}, // N cedilla - {from_ascii("\\c N"), from_ascii("Ņ")}, - {from_ascii("\\~{n}"), from_ascii("ñ")}, // n tilde - {from_ascii("\\~ n"), from_ascii("ñ")}, - {from_ascii("\\~{N}"), from_ascii("Ñ")}, // N tilde - {from_ascii("\\~ N"), from_ascii("Ñ")}, - {from_ascii("\\`{o}"), from_ascii("ó")}, // o acute - {from_ascii("\\` o"), from_ascii("ó")}, - {from_ascii("\\`{O}"), from_ascii("Ó")}, // O acute - {from_ascii("\\` O"), from_ascii("Ó")}, - {from_ascii("\\u{o}"), from_ascii("ŏ")}, // o breve - {from_ascii("\\u o"), from_ascii("ŏ")}, - {from_ascii("\\u{O}"), from_ascii("Ŏ")}, // O breve - {from_ascii("\\u O"), from_ascii("Ŏ")}, - {from_ascii("\\v{o}"), from_ascii("ǒ")}, // o caron - {from_ascii("\\v o"), from_ascii("ǒ")}, - {from_ascii("\\v{O}"), from_ascii("Ǒ")}, // O caron - {from_ascii("\\v O"), from_ascii("Ǒ")}, - // No o cedilla. - {from_ascii("\\^{o}"), from_ascii("ô")}, // o circumflex - {from_ascii("\\^ o"), from_ascii("ô")}, - {from_ascii("\\^{O}"), from_ascii("Ô")}, // O circumflex - {from_ascii("\\^ O"), from_ascii("Ô")}, - {from_ascii("\\\"{o}"), from_ascii("ö")}, // o diaeresis - {from_ascii("\\\" o"), from_ascii("ö")}, - {from_ascii("\\\"{O}"), from_ascii("Ö")}, // O diaeresis - {from_ascii("\\\" O"), from_ascii("Ö")}, - {from_ascii("\\H{o}"), from_ascii("ő")}, // o double acute - {from_ascii("\\H o"), from_ascii("ő")}, - {from_ascii("\\H{O}"), from_ascii("Ő")}, // O double acute - {from_ascii("\\H O"), from_ascii("Ő")}, - {from_ascii("\\`{o}"), from_ascii("ò")}, // o grave - {from_ascii("\\` o"), from_ascii("ò")}, - {from_ascii("\\`{O}"), from_ascii("Ò")}, // O grave - {from_ascii("\\` O"), from_ascii("Ò")}, - {from_ascii("\\o"), from_ascii("ø")}, // o stroke - {from_ascii("\\O"), from_ascii("Ø")}, // O stroke - {from_ascii("\\~{o}"), from_ascii("õ")}, // o tilde - {from_ascii("\\~ o"), from_ascii("õ")}, - {from_ascii("\\~{O}"), from_ascii("Õ")}, // O tilde - {from_ascii("\\~ O"), from_ascii("Õ")}, - // No o ring. - {from_ascii("\\oe"), from_ascii("œ")}, // oe ligature - {from_ascii("\\OE"), from_ascii("Œ")}, // OE ligature - {from_ascii("\\v{r}"), from_ascii("ř")}, // r caron - {from_ascii("\\v r"), from_ascii("ř")}, - {from_ascii("\\v{R}"), from_ascii("Ř")}, // R caron - {from_ascii("\\v R"), from_ascii("Ř")}, - {from_ascii("\\c{r}"), from_ascii("ŗ")}, // r cedilla - {from_ascii("\\c r"), from_ascii("ŗ")}, - {from_ascii("\\c{R}"), from_ascii("Ŗ")}, // R cedilla - {from_ascii("\\c R"), from_ascii("Ŗ")}, - {from_ascii("\\v{s}"), from_ascii("š")}, // s caron - {from_ascii("\\v s"), from_ascii("š")}, - {from_ascii("\\v{S}"), from_ascii("Š")}, // S caron - {from_ascii("\\v S"), from_ascii("Š")}, - {from_ascii("\\c{s}"), from_ascii("ş")}, // s cedilla - {from_ascii("\\c s"), from_ascii("ş")}, - {from_ascii("\\c{S}"), from_ascii("Ş")}, // S cedilla - {from_ascii("\\c S"), from_ascii("Ş")}, - {from_ascii("\\v{t}"), from_ascii("ť")}, // t caron - {from_ascii("\\v t"), from_ascii("ť")}, - {from_ascii("\\v{T}"), from_ascii("Ť")}, // T caron - {from_ascii("\\v T"), from_ascii("Ť")}, - {from_ascii("\\c{t}"), from_ascii("ţ")}, // t cedilla - {from_ascii("\\c t"), from_ascii("ţ")}, - {from_ascii("\\c{T}"), from_ascii("Ţ")}, // T cedilla - {from_ascii("\\c T"), from_ascii("Ţ")}, - {from_ascii("\\`{u}"), from_ascii("ú")}, // u acute - {from_ascii("\\` u"), from_ascii("ú")}, - {from_ascii("\\`{U}"), from_ascii("Ú")}, // U acute - {from_ascii("\\` U"), from_ascii("Ú")}, - {from_ascii("\\u{u}"), from_ascii("ŭ")}, // u breve - {from_ascii("\\u u"), from_ascii("ŭ")}, - {from_ascii("\\u{U}"), from_ascii("Ŭ")}, // U breve - {from_ascii("\\u U"), from_ascii("Ŭ")}, - {from_ascii("\\v{u}"), from_ascii("ǔ")}, // u caron - {from_ascii("\\v u"), from_ascii("ǔ")}, - {from_ascii("\\v{U}"), from_ascii("Ǔ")}, // U caron - {from_ascii("\\v U"), from_ascii("Ǔ")}, - // No u cedilla. - {from_ascii("\\^{u}"), from_ascii("û")}, // u circumflex - {from_ascii("\\^ u"), from_ascii("û")}, - {from_ascii("\\^{U}"), from_ascii("Û")}, // U circumflex - {from_ascii("\\^ U"), from_ascii("Û")}, - {from_ascii("\\\"{u}"), from_ascii("ü")}, // u diaeresis - {from_ascii("\\\" u"), from_ascii("ü")}, - {from_ascii("\\\"{U}"), from_ascii("Ü")}, // U diaeresis - {from_ascii("\\\" U"), from_ascii("Ü")}, - {from_ascii("\\H{u}"), from_ascii("ű")}, // u double acute - {from_ascii("\\H u"), from_ascii("ű")}, - {from_ascii("\\H{U}"), from_ascii("Ű")}, // U double acute - {from_ascii("\\H U"), from_ascii("Ű")}, - {from_ascii("\\`{u}"), from_ascii("ù")}, // u grave - {from_ascii("\\` u"), from_ascii("ù")}, - {from_ascii("\\`{U}"), from_ascii("Ù")}, // U grave - {from_ascii("\\` U"), from_ascii("Ù")}, - {from_ascii("\\~{u}"), from_ascii("ũ")}, // u tilde - {from_ascii("\\~ u"), from_ascii("ũ")}, - {from_ascii("\\~{U}"), from_ascii("Ũ")}, // U tilde - {from_ascii("\\~ U"), from_ascii("Ũ")}, - {from_ascii("\\r{u}"), from_ascii("å")}, // u ring - {from_ascii("\\r u"), from_ascii("ů")}, - {from_ascii("\\r{U}"), from_ascii("ů")}, // U ring - {from_ascii("\\r U"), from_ascii("Ů")}, - {from_ascii("\\`{y}"), from_ascii("ý")}, // y acute - {from_ascii("\\` y"), from_ascii("ý")}, - {from_ascii("\\`{Y}"), from_ascii("Ý")}, // Y acute - {from_ascii("\\` Y"), from_ascii("Ý")}, - {from_ascii("\\v{z}"), from_ascii("ž")}, // z caron - {from_ascii("\\v z"), from_ascii("ž")}, - {from_ascii("\\v{Z}"), from_ascii("Ž")}, // Z caron - {from_ascii("\\v Z"), from_ascii("Ž")}, - // No y breve. - // No y cedilla. - {from_ascii("\\^{y}"), from_ascii("ŷ")}, // y circumflex - {from_ascii("\\^ y"), from_ascii("ŷ")}, - {from_ascii("\\^{Y}"), from_ascii("Ŷ")}, // Y circumflex - {from_ascii("\\^ Y"), from_ascii("Ŷ")}, - {from_ascii("\\\"{y}"), from_ascii("ÿ")}, // y diaeresis - {from_ascii("\\\" y"), from_ascii("ÿ")}, - {from_ascii("\\\"{Y}"), from_ascii("Ÿ")}, // Y diaeresis - {from_ascii("\\\" Y"), from_ascii("Ÿ")}, - // No y double acute. - {from_ascii("\\`{y}"), from_ascii("ỳ")}, // y grave - {from_ascii("\\` y"), from_ascii("ỳ")}, - {from_ascii("\\`{Y}"), from_ascii("Ỳ")}, // Y grave - {from_ascii("\\` Y"), from_ascii("Ỳ")}, - {from_ascii("\\~{y}"), from_ascii("ỹ")}, // y tilde - {from_ascii("\\~ y"), from_ascii("ỹ")}, - {from_ascii("\\~{Y}"), from_ascii("Ỹ")}, // Y tilde - {from_ascii("\\~ Y"), from_ascii("Ỹ")}, - // No y ring. -}; - - void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const { auto const begin = paragraphs().begin(); @@ -441,26 +137,39 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const // First step: some commands have a direct mapping to DocBook, mostly because the mapping is simply text or // an XML entity. + // Logic is similar to that of convertLaTeXCommands in BiblioInfo.cpp. + // TODO: make the code even more similar by looping over the string and applying all conversions. (What is not + // recognised should simply be put in comments: have a list of elements that are either already recognised or are + // not yet recognised? Global transformations like \string should then come first.) { docstring os_trimmed = trim(os.str()); - auto command_raw_translation = raw_latex_encoding_to_unicode_xml.find(os_trimmed); - if (command_raw_translation != raw_latex_encoding_to_unicode_xml.end()) { - xs << command_raw_translation->second; - output_as_comment = false; - } else { - // If the trimmed ERT ends with {}, try a mapping without it. - auto os_braces = os_trimmed.find(from_ascii("{}")); + // Rewrite \"u to \"{u}. + static regex const regNoBraces(R"(^\\\W\w)"); + if (regex_search(to_utf8(os_trimmed), regNoBraces)) { + os_trimmed.insert(3, from_ascii("}")); + os_trimmed.insert(2, from_ascii("{")); + } - if (os_braces != lyx::docstring::npos) { - auto key = os_trimmed.substr(0, os_braces); - auto command_braces_translation = raw_latex_encoding_to_unicode_xml.find(key); + // Rewrite \" u to \"{u}. + static regex const regSpace(R"(^\\\W \w)"); + if (regex_search(to_utf8(os_trimmed), regSpace)) { + os_trimmed[2] = '{'; + os_trimmed.insert(4, from_ascii("}")); + } - if (command_braces_translation != raw_latex_encoding_to_unicode_xml.end()) { - xs << command_braces_translation->second; - output_as_comment = false; - } + // Look into the global table of Unicode characters if there is a match. + bool termination; + docstring rem; + docstring const converted = Encodings::fromLaTeXCommand(os_trimmed, + Encodings::TEXT_CMD, termination, rem); + if (!converted.empty()) { + // Don't output the characters directly, even if the document should be encoded in UTF-8, for editors that + // do not support all these funky characters. + for (const char_type& character : converted) { + xs << XMLStream::ESCAPE_NONE << from_ascii("&#" + std::to_string(character) + ';'); } + output_as_comment = false; } }