unicodesymbols: add general way to require a feature only for specific encodings

A feature can now be required only for specific input or font encodings:
- <feature>=enc1;enc2...  Require the feature <feature> only if the
                          character is used in one if the specified font
                          or input encodings.
- <feature>!=enc1;enc2... Require the feature <feature> only if the
                          character is used in a font or input encoding
                          that is not among the specified.
This commit is contained in:
Juergen Spitzmueller 2018-04-28 13:31:29 +02:00
parent 5b160e82be
commit 0b2fae66e3
5 changed files with 85 additions and 48 deletions

View File

@ -25,30 +25,36 @@
# syntax:
# ucs4 textcommand textpreamble flags mathcommand mathpreamble
# textcommand and textpreamble are used if the symbol occurs in textmode.
# mathcommand and mathpreamble are used if the symbol occurs in mathmode.
# Both mathcommand and mathpreamble are optional.
# textpreamble and mathpreamble can either be a feature known by the LaTeXFeatures
# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}).
# Features may be combined using '|', in this case one of the alternatives is
# chosen. The algorithm tries to satisfy as many requirements as possible.
# Therefore it may depend on the whole document contents which feature is chosen.
# Known flags:
# - combining This is a combining char that will get combined with a base char
# - force Always output replacement command
# - force=enc1;enc2... Always output replacement command in the specified encodings.
# - force!=en1;en2... Always output replacement command in all but the specified encodings.
# Symbols are never forced in encodings with iconv name
# UTF-8 and package none (currently only utf8-plain).
# - mathalpha This character is considered as a math variable in mathmode
# - notermination=text Do not terminate this textcommand (by {} or space).
# This is set by default if textcommand ends with }.
# - notermination=math Do not terminate this mathcommand (by {} or space).
# This is set by default if mathcommand ends with }.
# - notermination=both Do not terminate this textcommand and mathcommand (by {} or space).
# - notermination=none Always terminate this textcommand and mathcommand (by {} or space).
# - tipashortcut=<shortcut> Shortcut notation for TIPA
# - deprecated Do not use this symbol for backwards conversion in LyX and tex2lyx.
#
# * textcommand and textpreamble are used if the symbol occurs in textmode.
# * mathcommand and mathpreamble are used if the symbol occurs in mathmode.
# * Both mathcommand and mathpreamble are optional.
# * textpreamble and mathpreamble can either be a feature known by the LaTeXFeatures
# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}).
# * Features may be combined using '|', in this case one of the alternatives is
# chosen. The algorithm tries to satisfy as many requirements as possible.
# Therefore it may depend on the whole document contents which feature is chosen.
# * A feature can be required only for specific input encodings or font encodings:
# - <feature>=enc1;enc2... Require the feature <feature> only if the character is used in
# one if the specified font or input encodings.
# - <feature>!=enc1;enc2... Require the feature <feature> only if the character is used in
# a font or input encoding that is not among the specified.
# * Known flags:
# - combining This is a combining char that will get combined with a base char
# - force Always output replacement command
# - force=enc1;enc2... Always output replacement command in the specified encodings.
# - force!=enc1;enc2... Always output replacement command in all but the specified encodings.
# Symbols are never forced in encodings with iconv name
# UTF-8 and package none (currently only utf8-plain).
# - mathalpha This character is considered as a math variable in mathmode
# - notermination=text Do not terminate this textcommand (by {} or space).
# This is set by default if textcommand ends with }.
# - notermination=math Do not terminate this mathcommand (by {} or space).
# This is set by default if mathcommand ends with }.
# - notermination=both Do not terminate this textcommand and mathcommand (by {} or space).
# - notermination=none Always terminate this textcommand and mathcommand (by {} or space).
# - tipashortcut=<shortcut> Shortcut notation for TIPA
# - deprecated Do not use this symbol for backwards conversion in LyX and tex2lyx.
#
# 2 Latin-1 Supplement
@ -186,8 +192,8 @@
0x011f "\\u{g}" "" "mathalpha" "\\breve{g}" # LATIN SMALL LETTER G WITH BREVE
0x0120 "\\.{G}" "" "mathalpha" "\\dot{G}" # LATIN CAPITAL LETTER G WITH DOT ABOVE
0x0121 "\\.{g}" "" "mathalpha" "\\dot{g}" # LATIN SMALL LETTER G WITH DOT ABOVE
0x0122 "\\c{G}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA (actually a comma accent, Latvian)
0x0123 "\\c{g}" "textbaltic" "mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian)
0x0122 "\\c{G}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA (actually a comma accent, Latvian)
0x0123 "\\c{g}" "textbaltic!=L7x" "mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian)
0x0124 "\\^{H}" "" "mathalpha" "\\hat{H}" # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0x0125 "\\^{h}" "" "mathalpha" "\\hat{h}" # LATIN SMALL LETTER H WITH CIRCUMFLEX
#0x0126 "" "" "" "" "" # LATIN CAPITAL LETTER H WITH STROKE
@ -206,13 +212,13 @@
0x0133 "ij" "" "mathalpha,force=utf8x,notermination=both" "ij" "" # LATIN SMALL LIGATURE IJ
0x0134 "\\^{J}" "" "mathalpha" "\\hat{J}" # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0x0135 "\\^{\\j}" "" "mathalpha,force=utf8" "\\hat{\\jmath}" "" # LATIN SMALL LETTER J WITH CIRCUMFLEX
0x0136 "\\c{K}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA (actually a comma accent, Latvian)
0x0137 "\\c{k}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA (actually a comma accent, Latvian)
0x0136 "\\c{K}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA (actually a comma accent, Latvian)
0x0137 "\\c{k}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA (actually a comma accent, Latvian)
#0x0138 "" "" "" "" "" # LATIN SMALL LETTER KRA
0x0139 "\\'{L}" "" "mathalpha" "\\acute{L}" # LATIN CAPITAL LETTER L WITH ACUTE
0x013a "\\'{l}" "" "mathalpha" "\\acute{l}" # LATIN SMALL LETTER L WITH ACUTE
0x013b "\\c{L}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
0x013c "\\c{l}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
0x013b "\\c{L}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
0x013c "\\c{l}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
0x013d "\\v{L}" "" "mathalpha" "L\\mkern-7mu\\mathchar19" # LATIN CAPITAL LETTER L WITH CARON
0x013e "\\v{l}" "" "mathalpha" "l\\mkern-5mu\\mathchar19" # LATIN SMALL LETTER L WITH CARON
#0x013f "L\\textperiodcentered" "" "" "" "" # LATIN CAPITAL LETTER L WITH MIDDLE DOT
@ -221,8 +227,8 @@
0x0142 "\\l" "" "mathalpha,notermination=math" "\\mathchar'40\\mkern-5mu l" # LATIN SMALL LETTER L WITH STROKE
0x0143 "\\'{N}" "" "mathalpha" "\\acute{N}" # LATIN CAPITAL LETTER N WITH ACUTE
0x0144 "\\'{n}" "" "mathalpha" "\\acute{n}" # LATIN SMALL LETTER N WITH ACUTE
0x0145 "\\c{N}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
0x0146 "\\c{n}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
0x0145 "\\c{N}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
0x0146 "\\c{n}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
0x0147 "\\v{N}" "" "mathalpha" "\\check{N}" # LATIN CAPITAL LETTER N WITH CARON
0x0148 "\\v{n}" "" "mathalpha" "\\check{n}" # LATIN SMALL LETTER N WITH CARON
0x0149 "'n" "" "force=utf8;utf8x,deprecated" "" "" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
@ -238,8 +244,8 @@
0x0153 "\\oe" "" "" "" "" # LATIN SMALL LIGATURE OE
0x0154 "\\'{R}" "" "mathalpha" "\\acute{R}" # LATIN CAPITAL LETTER R WITH ACUTE
0x0155 "\\'{r}" "" "mathalpha" "\\acute{r}" # LATIN SMALL LETTER R WITH ACUTE
0x0156 "\\c{R}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
0x0157 "\\c{r}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
0x0156 "\\c{R}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
0x0157 "\\c{r}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
0x0158 "\\v{R}" "" "mathalpha" "\\check{R}" # LATIN CAPITAL LETTER R WITH CARON
0x0159 "\\v{r}" "" "mathalpha" "\\check{r}" # LATIN SMALL LETTER R WITH CARON
0x015a "\\'{S}" "" "mathalpha" "\\acute{S}" # LATIN CAPITAL LETTER S WITH ACUTE

View File

@ -93,7 +93,10 @@ void BufferEncodings::validate(char_type c, LaTeXFeatures & features, bool for_m
while (!feats.empty()) {
string feat;
feats = split(feats, feat, ',');
features.require(feat);
// context-dependent features are handled
// in Paragraph::Private::validate()
if (!contains(feat, '='))
features.require(feat);
}
} else
features.addPreambleSnippet(from_utf8(textpreamble));

View File

@ -589,11 +589,10 @@ bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
return false;
if (it->second.textpreamble() != "textgreek"
&& it->second.textpreamble() != "textcyrillic"
&& it->second.textpreamble() != "textbaltic")
&& it->second.textpreamble() != "textcyrillic")
return false;
if (preamble.empty() && it->second.textpreamble() != "textbaltic") {
if (preamble.empty()) {
preamble = it->second.textpreamble();
return true;
}
@ -609,8 +608,6 @@ bool Encodings::needsScriptWrapper(string const & script, string const & fontenc
return (fontenc != "T2A" && fontenc != "T2B"
&& fontenc != "T2C" && fontenc != "X2");
}
if (script == "textbaltic")
return (fontenc != "L7x");
return false;
}

View File

@ -1388,19 +1388,19 @@ TexString LaTeXFeatures::getMacros() const
// non-standard text accents:
if (mustProvide("textcommaabove") || mustProvide("textcommaaboveright") ||
mustProvide("textcommabelow") || mustProvide("textbalticdefs"))
mustProvide("textcommabelow") || mustProvide("textbaltic"))
macros << lyxaccent_def;
if (mustProvide("textcommabelow") || mustProvide("textbalticdefs"))
if (mustProvide("textcommabelow") || mustProvide("textbaltic"))
macros << textcommabelow_def << '\n';
if (mustProvide("textcommaabove") || mustProvide("textbalticdefs"))
if (mustProvide("textcommaabove") || mustProvide("textbaltic"))
macros << textcommaabove_def << '\n';
if (mustProvide("textcommaaboveright"))
macros << textcommaaboveright_def << '\n';
if (mustProvide("textbalticdefs"))
if (mustProvide("textbaltic"))
macros << textbaltic_def << '\n';
// split-level fractions

View File

@ -1553,21 +1553,52 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const
// then the contents
BufferParams const bp = features.runparams().is_child
? features.buffer().masterParams() : features.buffer().params();
string bscript = "textbaltic";
for (pos_type i = 0; i < int(text_.size()) ; ++i) {
char_type c = text_[i];
CharInfo const & ci = Encodings::unicodeCharInfo(c);
if (c == 0x0022) {
if (features.runparams().isFullUnicode() && bp.useNonTeXFonts)
features.require("textquotedblp");
else if (bp.main_font_encoding() != "T1"
|| ((&owner_->getFontSettings(bp, i))->language()->internalFontEncoding()))
features.require("textquotedbl");
} else if (Encodings::isKnownScriptChar(c, bscript)){
} else if (ci.textfeature() && contains(ci.textpreamble(), '=')) {
// features that depend on the font or input encoding
string feats = ci.textpreamble();
string fontenc = (&owner_->getFontSettings(bp, i))->language()->fontenc(bp);
if (fontenc.empty())
fontenc = features.runparams().main_fontenc;
if (Encodings::needsScriptWrapper("textbaltic", fontenc))
features.require("textbalticdefs");
while (!feats.empty()) {
string feat;
feats = split(feats, feat, ',');
if (contains(feat, "!=")) {
// a feature that is required except for the spcified
// font or input encodings
string realfeature;
string const contexts = ltrim(split(feat, realfeature, '!'), "=");
// multiple encodings are separated by semicolon
vector<string> context = getVectorFromString(contexts, ";");
// require feature if the context matches neither current font
// nor input encoding
if (std::find(context.begin(), context.end(), fontenc) == context.end()
&& std::find(context.begin(), context.end(),
features.runparams().encoding->name()) == context.end())
features.require(realfeature);
} else if (contains(feat, '=')) {
// a feature that is required only for the spcified
// font or input encodings
string realfeature;
string const contexts = split(feat, realfeature, '=');
// multiple encodings are separated by semicolon
vector<string> context = getVectorFromString(contexts, ";");
// require feature if the context matches either current font
// or input encoding
if (std::find(context.begin(), context.end(), fontenc) != context.end()
|| std::find(context.begin(), context.end(),
features.runparams().encoding->name()) != context.end())
features.require(realfeature);
}
}
} else if (!bp.use_dash_ligatures
&& (c == 0x2013 || c == 0x2014)
&& bp.useNonTeXFonts