Fix bug #5087 as suggested by Jürgen

Now it is possible to force certain commands only for some encodings.
2024-11-22 10:00:33 +00:00 · 2013-01-12 19:25:24 +01:00 · 2013-01-12 19:25:24 +01:00 · 7c6a6a05a2
commit 7c6a6a05a2
parent 409f384e21
3 changed files with 50 additions and 23 deletions
--- a/lib/unicodesymbols
+++ b/lib/unicodesymbols
@ -36,6 +36,7 @@
 # Known flags:
 # - combining       This is a combining char that will get combined with a base char
 # - force           Always output replacement command
+# - force=enc1,enc2... Always output replacement command in the specified encodings.
 # - mathalpha       This character is considered as a math variable in mathmode
 # - notermination=text Do not terminate this textcommand (by {} or space).
 #                      This is set by default if textcommand ends with }.
@ -1725,7 +1726,7 @@
 # use the following macro for the character ADRESSED TO THE SUBJECT
 0x2101 "\\LyXaddressed" "\\newcommand*\\LyXaddressed{\\mbox{\\raisebox{.8ex}{a}\\kern-.175em\\raisebox{.2ex}{/}\\kern-.18em\\raisebox{-.2ex}{s}}}" ""
 0x2102 ""                         "" "" "\\mathbb{C}" "amssymb" # DOUBLE-STRUCK CAPITAL C
-0x2103 "\\textcelsius"            "textcomp" "" # DEGREE CELSIUS
+0x2103 "\\textcelsius"            "textcomp" "force=utf8x" "" "" "" # DEGREE CELSIUS
 #0x2104 ""                         "" "" "" "" # CENTRE LINE SYMBOL
 # use the following macro for the character CARE OF
 0x2105 "\\LyXcareof" "\\newcommand*\\LyXcareof{\\mbox{\\raisebox{.8ex}{c}\\kern-.175em\\raisebox{.2ex}{/}\\kern-.18em\\raisebox{-.2ex}{o}}}" ""
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@ -143,7 +143,7 @@ char_type arabic_table[172][4] = {
 	{0, 0, 0, 0}, // 0x067b
 	{0, 0, 0, 0}, // 0x067c
 	{0, 0, 0, 0}, // 0x067d
-	{0xfb56, 0xfb57, 0xfb58, 0xfb59}, // 0x067e = peh 
+	{0xfb56, 0xfb57, 0xfb58, 0xfb59}, // 0x067e = peh
 	{0, 0, 0, 0}, // 0x067f
 	{0, 0, 0, 0}, // 0x0680
 	{0, 0, 0, 0}, // 0x0681
@ -151,7 +151,7 @@ char_type arabic_table[172][4] = {
 	{0, 0, 0, 0}, // 0x0683
 	{0, 0, 0, 0}, // 0x0684
 	{0, 0, 0, 0}, // 0x0685
-	{0xfb7a, 0xfb7b, 0xfb7c, 0xfb7d}, // 0x0686 = tcheh 
+	{0xfb7a, 0xfb7b, 0xfb7c, 0xfb7d}, // 0x0686 = tcheh
 	{0, 0, 0, 0}, // 0x0687
 	{0, 0, 0, 0}, // 0x0688
 	{0, 0, 0, 0}, // 0x0689
@ -186,13 +186,13 @@ char_type arabic_table[172][4] = {
 	{0, 0, 0, 0}, // 0x06a6
 	{0, 0, 0, 0}, // 0x06a7
 	{0, 0, 0, 0}, // 0x06a8
-	{0xfb8e, 0xfb8f, 0xfb90, 0xfb91}, // 0x06a9 = farsi kaf 
+	{0xfb8e, 0xfb8f, 0xfb90, 0xfb91}, // 0x06a9 = farsi kaf
 	{0, 0, 0, 0}, // 0x06aa
 	{0, 0, 0, 0}, // 0x06ab
 	{0, 0, 0, 0}, // 0x06ac
 	{0, 0, 0, 0}, // 0x06ad
 	{0, 0, 0, 0}, // 0x06ae
-	{0xfb92, 0xfb93, 0xfb94, 0xfb95}, // 0x06af = gaf 
+	{0xfb92, 0xfb93, 0xfb94, 0xfb95}, // 0x06af = gaf
 	{0, 0, 0, 0}, // 0x06b0
 	{0, 0, 0, 0}, // 0x06b1
 	{0, 0, 0, 0}, // 0x06b2
@ -221,7 +221,7 @@ char_type arabic_table[172][4] = {
 	{0, 0, 0, 0}, // 0x06c9
 	{0, 0, 0, 0}, // 0x06ca
 	{0, 0, 0, 0}, // 0x06cb
-	{0xfbfc, 0xfbfd, 0xfbfe, 0xfbff} // 0x06cc = farsi yeh	
+	{0xfbfc, 0xfbfd, 0xfbfe, 0xfbff} // 0x06cc = farsi yeh
 };


@ -242,6 +242,8 @@ enum CharInfoFlags {
 	CharInfoTextNoTermination = 16,
 	///
 	CharInfoMathNoTermination = 32,
+	///
+	CharInfoForceSelected = 64,
 };

 /// Information about a single UCS4 character
@ -265,6 +267,8 @@ struct CharInfo {
 	/// Always force the LaTeX command, even if the encoding contains
 	/// this character?
 	bool force() const { return flags & CharInfoForce ? true : false; }
+	/// Force the LaTeX command for some encodings?
+	bool forceselected() const { return flags & CharInfoForceSelected ? true : false; }
 	/// TIPA shortcut
 	string tipashortcut;
 	/// \c textcommand needs no termination (such as {} or space).
@ -280,7 +284,9 @@ typedef map<char_type, CharInfo> CharInfoMap;
 CharInfoMap unicodesymbols;

 typedef std::set<char_type> CharSet;
+typedef std::map<string, CharSet> CharSetMap;
 CharSet forced;
+CharSetMap forcedselected;

 typedef std::set<char_type> MathAlphaSet;
 MathAlphaSet mathalpha;
@ -307,7 +313,7 @@ const char * EncodingException::what() const throw()
 Encoding::Encoding(string const & n, string const & l, string const & g,
 		   string const & i, bool f, bool u, Encoding::Package p)
 	: name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f),
-	  unsafe_(u), package_(p)
+	  unsafe_(u), forced_(&forcedselected[n]), package_(p)
 {
 	if (n == "ascii") {
 		// ASCII can encode 128 code points and nothing else
@ -318,6 +324,7 @@ Encoding::Encoding(string const & n, string const & l, string const & g,
 		start_encodable_ = max_ucs4;
 		complete_ = true;
 	} else {
+		start_encodable_ = 0;
 		complete_ = false;
 	}
 }
@ -341,8 +348,12 @@ void Encoding::init() const
 				continue;
 			char_type const uc = ucs4[0];
 			CharInfoMap::const_iterator const it = unicodesymbols.find(uc);
-			if (it == unicodesymbols.end() || !it->second.force())
+			if (it == unicodesymbols.end())
 				encodable_.insert(uc);
+			else if (!it->second.force()) {
+				if (forced_->empty() || forced_->find(uc) == forced_->end())
+					encodable_.insert(uc);
+			}
 		}
 	} else {
 		// We do not know how many code points this encoding has, and
@ -353,8 +364,12 @@ void Encoding::init() const
 			vector<char> const eightbit = ucs4_to_eightbit(&c, 1, iconvName_);
 			if (!eightbit.empty()) {
 				CharInfoMap::const_iterator const it = unicodesymbols.find(c);
-				if (it == unicodesymbols.end() || !it->second.force())
+				if (it == unicodesymbols.end())
 					encodable_.insert(c);
+				else if (!it->second.force()) {
+					if (forced_->empty() || forced_->find(c) == forced_->end())
+						encodable_.insert(c);
+				}
 			}
 		}
 	}
@ -369,6 +384,14 @@ void Encoding::init() const
 }


+bool Encoding::isForced(char_type c) const
+{
+	if (!forced.empty() && forced.find(c) != forced.end())
+		return true;
+	return !forced_->empty() && forced_->find(c) != forced_->end();
+}
+
+
 bool Encoding::encodable(char_type c) const
 {
 	// assure the used encoding is properly initialized
@ -376,7 +399,7 @@ bool Encoding::encodable(char_type c) const

 	if (iconvName_ == "UTF-8" && package_ == none)
 		return true;
-	if (c < start_encodable_ && !encodings.isForced(c))
+	if (c < start_encodable_ && !isForced(c))
 		return true;
 	if (encodable_.find(c) != encodable_.end())
 		return true;
@ -812,12 +835,6 @@ bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
 }


-bool Encodings::isForced(char_type c)
-{
-	return (!forced.empty() && forced.find(c) != forced.end());
-}
-
-
 bool Encodings::isMathAlpha(char_type c)
 {
 	return mathalpha.count(c);
@ -912,6 +929,12 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
 			} else if (flag == "force") {
 				info.flags |= CharInfoForce;
 				forced.insert(symbol);
+			} else if (prefixIs(flag, "force=")) {
+				vector<string> encodings =
+					getVectorFromString(flag.substr(6));
+				for (size_t i = 0; i < encodings.size(); ++i)
+					forcedselected[encodings[i]].insert(symbol);
+				info.flags |= CharInfoForceSelected;
 			} else if (flag == "mathalpha") {
 				mathalpha.insert(symbol);
 			} else if (flag == "notermination=text") {
@ -974,7 +997,8 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
 			<< " '" << info.textfeature() << ' ' << info.textnotermination()
 			<< ' ' << to_utf8(info.mathcommand) << "' '" << info.mathpreamble
 			<< "' " << info.mathfeature() << ' ' << info.mathnotermination()
-			<< ' ' << info.combining() << ' ' << info.force());
+			<< ' ' << info.combining() << ' ' << info.force()
+			<< ' ' << info.forceselected());

 		// we assume that at least one command is nonempty when using unicodesymbols
 		if (!info.textcommand.empty() || !info.mathcommand.empty())
--- a/src/Encoding.h
+++ b/src/Encoding.h
@ -100,6 +100,12 @@ public:
 	/// A list of all characters usable in this encoding
 	std::vector<char_type> symbolsList() const;
 private:
+	/**
+	 * Do we have to output this character as LaTeX command in any case?
+	 * This is true if the "force" flag is set.
+	 * We need this if the inputencoding does not support a certain glyph.
+	 */
+	bool isForced(char_type c) const;
 	///
 	std::string name_;
 	///
@ -118,6 +124,8 @@ private:
 	/// Set of UCS4 characters that we can encode (for singlebyte
 	/// encodings only)
 	mutable CharSet encodable_;
+	/// Set of UCS4 characters that we can't encode
+	CharSet const * forced_;
 	/// All code points below this are encodable. This helps us to avoid
 	/// lokup of ASCII characters in encodable_ and gives about 1 sec
 	/// speedup on export of the Userguide.
@ -207,12 +215,6 @@ public:
 	 * \p c is a known character matching the preamble entry.
 	 */
 	static bool isKnownScriptChar(char_type const c, std::string & preamble);
-	/**
-	 * Do we have to output this character as LaTeX command in any case?
-	 * This is true if the "force" flag is set.
-	 * We need this if the inputencoding does not support a certain glyph.
-	 */
-	static bool isForced(char_type c);
 	/**
 	 * Do we have to display in italics this character when in mathmode?
 	 * This is true if the "mathalpha" flag is set. We use this for