unicodesymbols: add general way to require a feature only for specific encodings

A feature can now be required only for specific input or font encodings: - <feature>=enc1;enc2... Require the feature <feature> only if the character is used in one if the specified font or input encodings. - <feature>!=enc1;enc2... Require the feature <feature> only if the character is used in a font or input encoding that is not among the specified.
2024-12-22 05:16:21 +00:00 · 2018-04-28 13:31:29 +02:00 · 2018-04-28 13:31:29 +02:00 · 0b2fae66e3
commit 0b2fae66e3
parent 5b160e82be
5 changed files with 85 additions and 48 deletions
--- a/lib/unicodesymbols
+++ b/lib/unicodesymbols
@ -25,30 +25,36 @@

 # syntax:
 # ucs4 textcommand                textpreamble flags mathcommand mathpreamble
-# textcommand and textpreamble are used if the symbol occurs in textmode.
-# mathcommand and mathpreamble are used if the symbol occurs in mathmode.
-# Both mathcommand and mathpreamble are optional.
-# textpreamble and mathpreamble can either be a feature known by the LaTeXFeatures
-# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}).
-# Features may be combined using '|', in this case one of the alternatives is
-# chosen. The algorithm tries to satisfy as many requirements as possible.
-# Therefore it may depend on the whole document contents which feature is chosen.
-# Known flags:
-# - combining          This is a combining char that will get combined with a base char
-# - force              Always output replacement command
-# - force=enc1;enc2... Always output replacement command in the specified encodings.
-# - force!=en1;en2...  Always output replacement command in all but the specified encodings.
-#                      Symbols are never forced in encodings with iconv name
-#                      UTF-8 and package none (currently only utf8-plain).
-# - mathalpha          This character is considered as a math variable in mathmode
-# - notermination=text Do not terminate this textcommand (by {} or space).
-#                      This is set by default if textcommand ends with }.
-# - notermination=math Do not terminate this mathcommand (by {} or space).
-#                      This is set by default if mathcommand ends with }.
-# - notermination=both Do not terminate this textcommand and mathcommand (by {} or space).
-# - notermination=none Always terminate this textcommand and mathcommand (by {} or space).
-# - tipashortcut=<shortcut> Shortcut notation for TIPA
-# - deprecated         Do not use this symbol for backwards conversion in LyX and tex2lyx.
+#
+# * textcommand and textpreamble are used if the symbol occurs in textmode.
+# * mathcommand and mathpreamble are used if the symbol occurs in mathmode.
+# * Both mathcommand and mathpreamble are optional.
+# * textpreamble and mathpreamble can either be a feature known by the LaTeXFeatures
+#   class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}).
+# * Features may be combined using '|', in this case one of the alternatives is
+#   chosen. The algorithm tries to satisfy as many requirements as possible.
+#   Therefore it may depend on the whole document contents which feature is chosen.
+# * A feature can be required only for specific input encodings or font encodings:
+#   - <feature>=enc1;enc2...  Require the feature <feature> only if the character is used in
+#                             one if the specified font or input encodings.
+#   - <feature>!=enc1;enc2... Require the feature <feature> only if the character is used in
+#                             a font or input encoding that is not among the specified.
+# * Known flags:
+#   - combining               This is a combining char that will get combined with a base char
+#   - force                   Always output replacement command
+#   - force=enc1;enc2...      Always output replacement command in the specified encodings.
+#   - force!=enc1;enc2...     Always output replacement command in all but the specified encodings.
+#                             Symbols are never forced in encodings with iconv name
+#                             UTF-8 and package none (currently only utf8-plain).
+#   - mathalpha               This character is considered as a math variable in mathmode
+#   - notermination=text      Do not terminate this textcommand (by {} or space).
+#                             This is set by default if textcommand ends with }.
+#   - notermination=math      Do not terminate this mathcommand (by {} or space).
+#                             This is set by default if mathcommand ends with }.
+#   - notermination=both      Do not terminate this textcommand and mathcommand (by {} or space).
+#   - notermination=none      Always terminate this textcommand and mathcommand (by {} or space).
+#   - tipashortcut=<shortcut> Shortcut notation for TIPA
+#   - deprecated              Do not use this symbol for backwards conversion in LyX and tex2lyx.

 #
 # 2 Latin-1 Supplement
@ -186,8 +192,8 @@
 0x011f "\\u{g}"                   "" "mathalpha" "\\breve{g}" # LATIN SMALL LETTER G WITH BREVE
 0x0120 "\\.{G}"                   "" "mathalpha" "\\dot{G}" # LATIN CAPITAL LETTER G WITH DOT ABOVE
 0x0121 "\\.{g}"                   "" "mathalpha" "\\dot{g}" # LATIN SMALL LETTER G WITH DOT ABOVE
-0x0122 "\\c{G}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA (actually a comma accent, Latvian)
-0x0123 "\\c{g}"                   "textbaltic" "mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian)
+0x0122 "\\c{G}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA (actually a comma accent, Latvian)
+0x0123 "\\c{g}"                   "textbaltic!=L7x" "mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian)
 0x0124 "\\^{H}"                   "" "mathalpha" "\\hat{H}" # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
 0x0125 "\\^{h}"                   "" "mathalpha" "\\hat{h}" # LATIN SMALL LETTER H WITH CIRCUMFLEX
 #0x0126 ""                         "" "" "" "" # LATIN CAPITAL LETTER H WITH STROKE
@ -206,13 +212,13 @@
 0x0133 "ij"                       "" "mathalpha,force=utf8x,notermination=both" "ij" "" # LATIN SMALL LIGATURE IJ
 0x0134 "\\^{J}"                   "" "mathalpha" "\\hat{J}" # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
 0x0135 "\\^{\\j}"                 "" "mathalpha,force=utf8" "\\hat{\\jmath}" "" # LATIN SMALL LETTER J WITH CIRCUMFLEX
-0x0136 "\\c{K}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA  (actually a comma accent, Latvian)
-0x0137 "\\c{k}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA  (actually a comma accent, Latvian)
+0x0136 "\\c{K}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA  (actually a comma accent, Latvian)
+0x0137 "\\c{k}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA  (actually a comma accent, Latvian)
 #0x0138 ""                         "" "" "" "" # LATIN SMALL LETTER KRA
 0x0139 "\\'{L}"                   "" "mathalpha" "\\acute{L}" # LATIN CAPITAL LETTER L WITH ACUTE
 0x013a "\\'{l}"                   "" "mathalpha" "\\acute{l}" # LATIN SMALL LETTER L WITH ACUTE
-0x013b "\\c{L}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
-0x013c "\\c{l}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
+0x013b "\\c{L}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
+0x013c "\\c{l}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually a comma accent, Latvian)
 0x013d "\\v{L}"                   "" "mathalpha" "L\\mkern-7mu\\mathchar19" # LATIN CAPITAL LETTER L WITH CARON
 0x013e "\\v{l}"                   "" "mathalpha" "l\\mkern-5mu\\mathchar19" # LATIN SMALL LETTER L WITH CARON
 #0x013f "L\\textperiodcentered"    "" "" "" "" # LATIN CAPITAL LETTER L WITH MIDDLE DOT
@ -221,8 +227,8 @@
 0x0142 "\\l"                      "" "mathalpha,notermination=math" "\\mathchar'40\\mkern-5mu l" # LATIN SMALL LETTER L WITH STROKE
 0x0143 "\\'{N}"                   "" "mathalpha" "\\acute{N}" # LATIN CAPITAL LETTER N WITH ACUTE
 0x0144 "\\'{n}"                   "" "mathalpha" "\\acute{n}" # LATIN SMALL LETTER N WITH ACUTE
-0x0145 "\\c{N}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
-0x0146 "\\c{n}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
+0x0145 "\\c{N}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
+0x0146 "\\c{n}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually a comma accent, Latvian)
 0x0147 "\\v{N}"                   "" "mathalpha" "\\check{N}" # LATIN CAPITAL LETTER N WITH CARON
 0x0148 "\\v{n}"                   "" "mathalpha" "\\check{n}" # LATIN SMALL LETTER N WITH CARON
 0x0149 "'n"                       "" "force=utf8;utf8x,deprecated" "" "" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
@ -238,8 +244,8 @@
 0x0153 "\\oe"                     "" "" "" "" # LATIN SMALL LIGATURE OE
 0x0154 "\\'{R}"                   "" "mathalpha" "\\acute{R}" # LATIN CAPITAL LETTER R WITH ACUTE
 0x0155 "\\'{r}"                   "" "mathalpha" "\\acute{r}" # LATIN SMALL LETTER R WITH ACUTE
-0x0156 "\\c{R}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
-0x0157 "\\c{r}"                   "textbaltic" "mathalpha,force=utf8" "\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
+0x0156 "\\c{R}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
+0x0157 "\\c{r}"                   "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually a comma accent, Latvian)
 0x0158 "\\v{R}"                   "" "mathalpha" "\\check{R}" # LATIN CAPITAL LETTER R WITH CARON
 0x0159 "\\v{r}"                   "" "mathalpha" "\\check{r}" # LATIN SMALL LETTER R WITH CARON
 0x015a "\\'{S}"                   "" "mathalpha" "\\acute{S}" # LATIN CAPITAL LETTER S WITH ACUTE
--- a/src/BufferEncodings.cpp
+++ b/src/BufferEncodings.cpp
@ -93,7 +93,10 @@ void BufferEncodings::validate(char_type c, LaTeXFeatures & features, bool for_m
 					while (!feats.empty()) {
 						string feat;
 						feats = split(feats, feat, ',');
-						features.require(feat);
+						// context-dependent features are handled
+						// in Paragraph::Private::validate()
+						if (!contains(feat, '='))
+							features.require(feat);
 					}
 				} else
 					features.addPreambleSnippet(from_utf8(textpreamble));
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@ -589,11 +589,10 @@ bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
 		return false;

 	if (it->second.textpreamble() != "textgreek"
-	    && it->second.textpreamble() != "textcyrillic"
-	    && it->second.textpreamble() != "textbaltic")
+	    && it->second.textpreamble() != "textcyrillic")
 		return false;

-	if (preamble.empty() && it->second.textpreamble() != "textbaltic") {
+	if (preamble.empty()) {
 		preamble = it->second.textpreamble();
 		return true;
 	}
@ -609,8 +608,6 @@ bool Encodings::needsScriptWrapper(string const & script, string const & fontenc
 		return (fontenc != "T2A" && fontenc != "T2B"
 			&& fontenc != "T2C" && fontenc != "X2");
 	}
-	if (script == "textbaltic")
-		return (fontenc != "L7x");
 	return false;
 }

--- a/src/LaTeXFeatures.cpp
+++ b/src/LaTeXFeatures.cpp
@ -1388,19 +1388,19 @@ TexString LaTeXFeatures::getMacros() const

 	// non-standard text accents:
 	if (mustProvide("textcommaabove") || mustProvide("textcommaaboveright") ||
-	    mustProvide("textcommabelow") || mustProvide("textbalticdefs"))
+	    mustProvide("textcommabelow") || mustProvide("textbaltic"))
 		macros << lyxaccent_def;

-	if (mustProvide("textcommabelow") || mustProvide("textbalticdefs"))
+	if (mustProvide("textcommabelow") || mustProvide("textbaltic"))
 		macros << textcommabelow_def << '\n';

-	if (mustProvide("textcommaabove") || mustProvide("textbalticdefs"))
+	if (mustProvide("textcommaabove") || mustProvide("textbaltic"))
 		macros << textcommaabove_def << '\n';

 	if (mustProvide("textcommaaboveright"))
 		macros << textcommaaboveright_def << '\n';

-	if (mustProvide("textbalticdefs"))
+	if (mustProvide("textbaltic"))
 		macros << textbaltic_def << '\n';

 	// split-level fractions
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@ -1553,21 +1553,52 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const
 	// then the contents
 	BufferParams const bp = features.runparams().is_child
 		? features.buffer().masterParams() : features.buffer().params();
-	string bscript = "textbaltic";
 	for (pos_type i = 0; i < int(text_.size()) ; ++i) {
 		char_type c = text_[i];
+		CharInfo const & ci = Encodings::unicodeCharInfo(c);
 		if (c == 0x0022) {
 			if (features.runparams().isFullUnicode() && bp.useNonTeXFonts)
 				features.require("textquotedblp");
 			else if (bp.main_font_encoding() != "T1"
 				 || ((&owner_->getFontSettings(bp, i))->language()->internalFontEncoding()))
 				features.require("textquotedbl");
-		} else if (Encodings::isKnownScriptChar(c, bscript)){
+		} else if (ci.textfeature() && contains(ci.textpreamble(), '=')) {
+			// features that depend on the font or input encoding
+			string feats = ci.textpreamble();
 			string fontenc = (&owner_->getFontSettings(bp, i))->language()->fontenc(bp);
 			if (fontenc.empty())
 				fontenc = features.runparams().main_fontenc;
-			if (Encodings::needsScriptWrapper("textbaltic", fontenc))
-				features.require("textbalticdefs");
+			while (!feats.empty()) {
+				string feat;
+				feats = split(feats, feat, ',');
+				if (contains(feat, "!=")) {
+					// a feature that is required except for the spcified
+					// font or input encodings
+					string realfeature;
+					string const contexts = ltrim(split(feat, realfeature, '!'), "=");
+					// multiple encodings are separated by semicolon
+					vector<string> context = getVectorFromString(contexts, ";");
+					// require feature if the context matches neither current font
+					// nor input encoding
+					if (std::find(context.begin(), context.end(), fontenc) == context.end()
+					    && std::find(context.begin(), context.end(),
+							 features.runparams().encoding->name()) == context.end())
+						features.require(realfeature);
+				} else if (contains(feat, '=')) {
+					// a feature that is required only for the spcified
+					// font or input encodings
+					string realfeature;
+					string const contexts = split(feat, realfeature, '=');
+					// multiple encodings are separated by semicolon
+					vector<string> context = getVectorFromString(contexts, ";");
+					// require feature if the context matches either current font
+					// or input encoding
+					if (std::find(context.begin(), context.end(), fontenc) != context.end()
+					    || std::find(context.begin(), context.end(),
+							 features.runparams().encoding->name()) != context.end())
+						features.require(realfeature);
+				}
+			}
 		} else if (!bp.use_dash_ligatures
 			   && (c == 0x2013 || c == 0x2014)
 			   && bp.useNonTeXFonts